diff options
Diffstat (limited to 'lib/Target/X86/X86InstrAVX512.td')
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 1457 |
1 files changed, 793 insertions, 664 deletions
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 54eddeacaa17..9b5de59430a5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -74,6 +74,7 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); + PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"), !cast<ComplexPattern>("sse_load_f32"), @@ -412,6 +413,14 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllOnesV))]>; } +let Predicates = [HasAVX512] in { +def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; +} + // Alias instructions that allow VPTERNLOG to be used with a mask to create // a mix of all ones and all zeros elements. This is done this way to force // the same register to be used as input for all three sources. @@ -436,6 +445,19 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", [(set VR256X:$dst, (v8i32 immAllZerosV))]>; } +let Predicates = [HasAVX512] in { +def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; +} + // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, @@ -443,7 +465,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", [(set FR32X:$dst, fp32imm0)]>; def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", - [(set FR64X:$dst, fpimm0)]>; + [(set FR64X:$dst, fp64imm0)]>; + def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", + [(set VR128X:$dst, fp128imm0)]>; } //===----------------------------------------------------------------------===// @@ -730,14 +754,14 @@ let isCommutable = 1 in def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, + [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, + timm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>, Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } @@ -1100,75 +1124,104 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, X86VectorVTInfo MaskInfo, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, - SDPatternOperator UnmaskedOp = X86VBroadcast> { - let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in { - defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo, - (outs MaskInfo.RC:$dst), - (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src", - (MaskInfo.VT - (bitconvert - (DestInfo.VT - (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))), - (MaskInfo.VT - (bitconvert - (DestInfo.VT - (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>, - T8PD, EVEX, Sched<[SchedRR]>; - let mayLoad = 1 in - defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo, - (outs MaskInfo.RC:$dst), - (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src", - (MaskInfo.VT - (bitconvert - (DestInfo.VT (UnmaskedOp - (SrcInfo.ScalarLdFrag addr:$src))))), - (MaskInfo.VT - (bitconvert - (DestInfo.VT (X86VBroadcast - (SrcInfo.ScalarLdFrag addr:$src)))))>, - T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>, - Sched<[SchedRM]>; - } - - def : Pat<(MaskInfo.VT - (bitconvert - (DestInfo.VT (UnmaskedOp - (SrcInfo.VT (scalar_to_vector - (SrcInfo.ScalarLdFrag addr:$src))))))), - (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>; - def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, + bit IsConvertibleToThreeAddress, + SDPatternOperator UnmaskedOp = X86VBroadcast, + SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { + let hasSideEffects = 0 in + def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set MaskInfo.RC:$dst, + (MaskInfo.VT + (bitconvert + (DestInfo.VT + (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], + DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; + def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), + (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), + !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", + "${dst} {${mask}} {z}, $src}"), + [(set MaskInfo.RC:$dst, + (vselect MaskInfo.KRCWM:$mask, + (MaskInfo.VT (bitconvert (DestInfo.VT - (X86VBroadcast - (SrcInfo.VT (scalar_to_vector - (SrcInfo.ScalarLdFrag addr:$src)))))), - MaskInfo.RC:$src0)), - (!cast<Instruction>(Name#DestInfo.ZSuffix#mk) - MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>; - def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, + (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), + MaskInfo.ImmAllZerosV))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; + let Constraints = "$src0 = $dst" in + def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), + (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, + SrcInfo.RC:$src), + !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", + "${dst} {${mask}}, $src}"), + [(set MaskInfo.RC:$dst, + (vselect MaskInfo.KRCWM:$mask, + (MaskInfo.VT + (bitconvert + (DestInfo.VT + (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), + MaskInfo.RC:$src0))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; + + let hasSideEffects = 0, mayLoad = 1 in + def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), + (ins SrcInfo.ScalarMemOp:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set MaskInfo.RC:$dst, + (MaskInfo.VT + (bitconvert + (DestInfo.VT + (UnmaskedBcastOp addr:$src)))))], + DestInfo.ExeDomain>, T8PD, EVEX, + EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; + + def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), + (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), + !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", + "${dst} {${mask}} {z}, $src}"), + [(set MaskInfo.RC:$dst, + (vselect MaskInfo.KRCWM:$mask, + (MaskInfo.VT (bitconvert (DestInfo.VT - (X86VBroadcast - (SrcInfo.VT (scalar_to_vector - (SrcInfo.ScalarLdFrag addr:$src)))))), - MaskInfo.ImmAllZerosV)), - (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz) - MaskInfo.KRCWM:$mask, addr:$src)>; + (SrcInfo.BroadcastLdFrag addr:$src)))), + MaskInfo.ImmAllZerosV))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, + EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; + + let Constraints = "$src0 = $dst", + isConvertibleToThreeAddress = IsConvertibleToThreeAddress in + def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), + (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, + SrcInfo.ScalarMemOp:$src), + !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", + "${dst} {${mask}}, $src}"), + [(set MaskInfo.RC:$dst, + (vselect MaskInfo.KRCWM:$mask, + (MaskInfo.VT + (bitconvert + (DestInfo.VT + (SrcInfo.BroadcastLdFrag addr:$src)))), + MaskInfo.RC:$src0))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, + EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; } // Helper class to force mask and broadcast result to same type. multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name, SchedWrite SchedRR, SchedWrite SchedRM, X86VectorVTInfo DestInfo, - X86VectorVTInfo SrcInfo> : + X86VectorVTInfo SrcInfo, + bit IsConvertibleToThreeAddress> : avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM, - DestInfo, DestInfo, SrcInfo>; + DestInfo, DestInfo, SrcInfo, + IsConvertibleToThreeAddress>; multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, - WriteFShuffle256Ld, _.info512, _.info128>, + WriteFShuffle256Ld, _.info512, _.info128, 1>, avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, _.info128>, EVEX_V512; @@ -1176,7 +1229,7 @@ multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, let Predicates = [HasVLX] in { defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, - WriteFShuffle256Ld, _.info256, _.info128>, + WriteFShuffle256Ld, _.info256, _.info128, 1>, avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, _.info128>, EVEX_V256; @@ -1187,7 +1240,7 @@ multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, - WriteFShuffle256Ld, _.info512, _.info128>, + WriteFShuffle256Ld, _.info512, _.info128, 1>, avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512, _.info128>, EVEX_V512; @@ -1195,12 +1248,12 @@ multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, let Predicates = [HasVLX] in { defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, - WriteFShuffle256Ld, _.info256, _.info128>, + WriteFShuffle256Ld, _.info256, _.info128, 1>, avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256, _.info128>, EVEX_V256; defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256, - WriteFShuffle256Ld, _.info128, _.info128>, + WriteFShuffle256Ld, _.info128, _.info128, 1>, avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128, _.info128>, EVEX_V128; @@ -1284,46 +1337,35 @@ defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, X86VBroadcast, GR64, HasAVX512>, VEX_W; -// Provide aliases for broadcast from the same register class that -// automatically does the extract. -multiclass avx512_int_broadcast_rm_lowering<string Name, - X86VectorVTInfo DestInfo, - X86VectorVTInfo SrcInfo, - X86VectorVTInfo ExtInfo> { - def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))), - (!cast<Instruction>(Name#DestInfo.ZSuffix#"r") - (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>; -} - multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + bit IsConvertibleToThreeAddress> { let Predicates = [prd] in { defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, - WriteShuffle256Ld, _.info512, _.info128>, - avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>, + WriteShuffle256Ld, _.info512, _.info128, + IsConvertibleToThreeAddress>, EVEX_V512; - // Defined separately to avoid redefinition. - defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>; } let Predicates = [prd, HasVLX] in { defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256, - WriteShuffle256Ld, _.info256, _.info128>, - avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>, + WriteShuffle256Ld, _.info256, _.info128, + IsConvertibleToThreeAddress>, EVEX_V256; defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle, - WriteShuffleXLd, _.info128, _.info128>, + WriteShuffleXLd, _.info128, _.info128, + IsConvertibleToThreeAddress>, EVEX_V128; } } defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", - avx512vl_i8_info, HasBWI>; + avx512vl_i8_info, HasBWI, 0>; defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", - avx512vl_i16_info, HasBWI>; + avx512vl_i16_info, HasBWI, 0>; defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", - avx512vl_i32_info, HasAVX512>; + avx512vl_i32_info, HasAVX512, 1>; defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", - avx512vl_i64_info, HasAVX512>, VEX_W1X; + avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { @@ -1354,6 +1396,10 @@ let Predicates = [HasAVX512] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZm addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDZm addr:$src)>; } let Predicates = [HasVLX] in { @@ -1362,6 +1408,12 @@ let Predicates = [HasVLX] in { (VPBROADCASTQZ128m addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZ256m addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDZ128m addr:$src)>; + def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDZ256m addr:$src)>; } let Predicates = [HasVLX, HasBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. @@ -1382,6 +1434,12 @@ let Predicates = [HasVLX, HasBWI] in { def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWZ256m addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWZ128m addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWZ256m addr:$src)>; } let Predicates = [HasBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. @@ -1394,6 +1452,10 @@ let Predicates = [HasBWI] in { def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWZm addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWZm addr:$src)>; } //===----------------------------------------------------------------------===// @@ -1629,12 +1691,12 @@ multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, let Predicates = [HasDQI] in defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, WriteShuffle256Ld, _Dst.info512, - _Src.info512, _Src.info128, null_frag>, + _Src.info512, _Src.info128, 0, null_frag, null_frag>, EVEX_V512; let Predicates = [HasDQI, HasVLX] in defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256, WriteShuffle256Ld, _Dst.info256, - _Src.info256, _Src.info128, null_frag>, + _Src.info256, _Src.info128, 0, null_frag, null_frag>, EVEX_V256; } @@ -1645,7 +1707,7 @@ multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, let Predicates = [HasDQI, HasVLX] in defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle, WriteShuffleXLd, _Dst.info128, - _Src.info128, _Src.info128, null_frag>, + _Src.info128, _Src.info128, 0, null_frag, null_frag>, EVEX_V128; } @@ -1654,23 +1716,6 @@ defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", avx512vl_f32_info, avx512vl_f64_info>; -let Predicates = [HasVLX] in { -def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))), - (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; -def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))), - (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; -} - -def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), - (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>; -def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), - (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; - -def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), - (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>; -def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), - (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; - //===----------------------------------------------------------------------===// // AVX-512 BROADCAST MASK TO VECTOR REGISTER //--- @@ -1730,7 +1775,7 @@ multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (X86VPermt2 _.RC:$src2, - IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, + IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1807,7 +1852,7 @@ multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, def : Pat<(_.VT (vselect _.KRCWM:$mask, (X86VPermt2 _.RC:$src2, (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), - (X86VBroadcast (_.ScalarLdFrag addr:$src3))), + (_.BroadcastLdFrag addr:$src3)), (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; @@ -1846,7 +1891,7 @@ multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (X86VPermt2 _.RC:$src1, - IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, + IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1947,7 +1992,7 @@ multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, } multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let mayLoad = 1, hasSideEffects = 0 in { + let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), !strconcat(OpcodeStr, @@ -2031,9 +2076,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", - (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>; + timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>; let mayLoad = 1 in defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2041,9 +2086,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, - imm:$cc), + timm:$cc), (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, - imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, + timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, @@ -2052,9 +2097,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc), + timm:$cc), (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc)>, + timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; let isCodeGenOnly = 1 in { @@ -2065,7 +2110,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [(set _.KRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, - imm:$cc))]>, + timm:$cc))]>, EVEX_4V, VEX_LIG, Sched<[sched]>; def rm : AVX512Ii8<0xC2, MRMSrcMem, (outs _.KRC:$dst), @@ -2074,7 +2119,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [(set _.KRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2), - imm:$cc))]>, + timm:$cc))]>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -2100,94 +2145,82 @@ let Predicates = [HasAVX512] in { SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; } -multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode, - PatFrag OpNode_su, X86FoldableSchedWrite sched, +multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable> { - let isCommutable = IsCommutable in + let isCommutable = IsCommutable, hasSideEffects = 0 in def rr : AVX512BI<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>, - EVEX_4V, Sched<[sched]>; + []>, EVEX_4V, Sched<[sched]>; + let mayLoad = 1, hasSideEffects = 0 in def rm : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), - (_.VT (_.LdFrag addr:$src2))))]>, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; - let isCommutable = IsCommutable in + []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + let isCommutable = IsCommutable, hasSideEffects = 0 in def rrk : AVX512BI<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2}"), - [(set _.KRC:$dst, (and _.KRCWM:$mask, - (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>, - EVEX_4V, EVEX_K, Sched<[sched]>; + []>, EVEX_4V, EVEX_K, Sched<[sched]>; + let mayLoad = 1, hasSideEffects = 0 in def rmk : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2}"), - [(set _.KRC:$dst, (and _.KRCWM:$mask, - (OpNode_su (_.VT _.RC:$src1), - (_.VT (_.LdFrag addr:$src2)))))]>, - EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; + []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } -multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode, - PatFrag OpNode_su, +multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable> : - avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> { + avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { + let mayLoad = 1, hasSideEffects = 0 in { def rmb : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), - [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), - (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>, - EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), - [(set _.KRC:$dst, (and _.KRCWM:$mask, - (OpNode_su (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))))]>, - EVEX_4V, EVEX_K, EVEX_B, + []>, EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + } } -multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode, - PatFrag OpNode_su, X86SchedWriteWidths sched, +multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM, + defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, VTInfo.info512, IsCommutable>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM, + defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, VTInfo.info256, IsCommutable>, EVEX_V256; - defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM, + defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, VTInfo.info128, IsCommutable>, EVEX_V128; } } multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, - PatFrag OpNode, PatFrag OpNode_su, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM, + defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512, IsCommutable>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM, + defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256, IsCommutable>, EVEX_V256; - defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM, + defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128, IsCommutable>, EVEX_V128; } } @@ -2195,53 +2228,42 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, // This fragment treats X86cmpm as commutable to help match loads in both // operands for PCMPEQ. def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; -def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2), - (X86setcc_commute node:$src1, node:$src2, SETEQ)>; def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), (setcc node:$src1, node:$src2, SETGT)>; -def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2), - (X86pcmpeqm_c node:$src1, node:$src2), [{ - return N->hasOneUse(); -}]>; -def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2), - (X86pcmpgtm node:$src1, node:$src2), [{ - return N->hasOneUse(); -}]>; - // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't // increase the pattern complexity the way an immediate would. let AddedComplexity = 2 in { // FIXME: Is there a better scheduler class for VPCMP? -defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, EVEX_CD8<8, CD8VF>, VEX_WIG; -defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, EVEX_CD8<16, CD8VF>, VEX_WIG; -defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, EVEX_CD8<32, CD8VF>; -defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", SchedWriteVecALU, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>, VEX_WIG; -defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", SchedWriteVecALU, avx512vl_i16_info, HasBWI>, EVEX_CD8<16, CD8VF>, VEX_WIG; -defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, EVEX_CD8<32, CD8VF>; -defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; } @@ -2322,8 +2344,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + (_.BroadcastLdFrag addr:$src2), cond)))]>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmibk : AVX512AIi8<opc, MRMSrcMem, @@ -2335,23 +2356,21 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, [(set _.KRC:$dst, (and _.KRCWM:$mask, (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + (_.BroadcastLdFrag addr:$src2), cond))))]>, EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; - def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2), (_.VT _.RC:$src1), cond)), (!cast<Instruction>(Name#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; def : Pat<(and _.KRCWM:$mask, - (_.KVT (CommFrag_su:$cc (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2), (_.VT _.RC:$src1), cond))), (!cast<Instruction>(Name#_.ZSuffix#"rmibk") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - (CommFrag.OperandTransform $cc))>; + (CommFrag_su.OperandTransform $cc))>; } multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, @@ -2496,14 +2515,19 @@ def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), return N->hasOneUse(); }]>; +def X86cmpm_imm_commute : SDNodeXForm<timm, [{ + uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); + return getI8Imm(Imm, SDLoc(N)); +}]>; + multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, string Name> { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", - (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), - (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), + (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), + (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1>, Sched<[sched]>; defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, @@ -2511,9 +2535,9 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), - imm:$cc), + timm:$cc), (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), - imm:$cc)>, + timm:$cc)>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, @@ -2523,38 +2547,37 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, "$cc, ${src2}"#_.BroadcastStr#", $src1", "$src1, ${src2}"#_.BroadcastStr#", $cc", (X86cmpm (_.VT _.RC:$src1), - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - imm:$cc), + (_.VT (_.BroadcastLdFrag addr:$src2)), + timm:$cc), (X86cmpm_su (_.VT _.RC:$src1), - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - imm:$cc)>, + (_.VT (_.BroadcastLdFrag addr:$src2)), + timm:$cc)>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; // Patterns for selecting with loads in other operand. def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), - CommutableCMPCC:$cc), + timm:$cc), (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), (_.VT _.RC:$src1), - CommutableCMPCC:$cc)), + timm:$cc)), (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; - def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - (_.VT _.RC:$src1), CommutableCMPCC:$cc), + def : Pat<(X86cmpm (_.BroadcastLdFrag addr:$src2), + (_.VT _.RC:$src1), timm:$cc), (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; - def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), (_.VT _.RC:$src1), - CommutableCMPCC:$cc)), + timm:$cc)), (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; } multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { @@ -2564,9 +2587,9 @@ multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1", "$src1, $src2, {sae}, $cc", - (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), + (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc)>, + timm:$cc)>, EVEX_B, Sched<[sched]>; } @@ -2590,12 +2613,12 @@ defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, // Patterns to select fp compares with load as first operand. let Predicates = [HasAVX512] in { def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, - CommutableCMPCC:$cc)), - (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>; + timm:$cc)), + (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, - CommutableCMPCC:$cc)), - (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>; + timm:$cc)), + (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; } // ---------------------------------------------------------------- @@ -2621,7 +2644,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched]>; def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), @@ -2629,7 +2652,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclasss_su (_.VT _.RC:$src1), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched]>; def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), @@ -2637,7 +2660,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst, (X86Vfpclasss _.ScalarIntMemCPat:$src1, - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), @@ -2645,7 +2668,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclasss_su _.ScalarIntMemCPat:$src1, - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -2661,7 +2684,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched]>; def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), @@ -2669,7 +2692,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su (_.VT _.RC:$src1), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched]>; def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.MemOp:$src1, i32u8imm:$src2), @@ -2677,7 +2700,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT (_.LdFrag addr:$src1)), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), @@ -2685,7 +2708,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su (_.VT (_.LdFrag addr:$src1)), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.ScalarMemOp:$src1, i32u8imm:$src2), @@ -2693,9 +2716,8 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, _.BroadcastStr##", $dst|$dst, ${src1}" ##_.BroadcastStr##", $src2}", [(set _.KRC:$dst,(X86Vfpclass - (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2)))]>, + (_.VT (_.BroadcastLdFrag addr:$src1)), + (i32 timm:$src2)))]>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), @@ -2703,9 +2725,8 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"## _.BroadcastStr##", $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su - (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2))))]>, + (_.VT (_.BroadcastLdFrag addr:$src1)), + (i32 timm:$src2))))]>, EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -2836,13 +2857,21 @@ def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), (KMOVWrk VK16:$src)>; +def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), + (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), (COPY_TO_REGCLASS VK16:$src, GR32)>; +def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), + (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; +def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), + (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), (COPY_TO_REGCLASS VK8:$src, GR32)>; +def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), + (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (COPY_TO_REGCLASS GR32:$src, VK32)>; @@ -3075,7 +3104,7 @@ multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), !strconcat(OpcodeStr, "\t{$imm, $src, $dst|$dst, $src, $imm}"), - [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>, + [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, Sched<[sched]>; } @@ -3098,30 +3127,6 @@ defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShu defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. -multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, - string InstStr, - X86VectorVTInfo Narrow, - X86VectorVTInfo Wide> { - def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2))), - (COPY_TO_REGCLASS - (!cast<Instruction>(InstStr#"Zrr") - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), - Narrow.KRC)>; - - def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, - (Frag_su (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2)))), - (COPY_TO_REGCLASS - (!cast<Instruction>(InstStr#"Zrrk") - (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), - Narrow.KRC)>; -} - -// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, string InstStr, X86VectorVTInfo Narrow, @@ -3129,7 +3134,7 @@ multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2), cond)), (COPY_TO_REGCLASS - (!cast<Instruction>(InstStr##Zrri) + (!cast<Instruction>(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), (Frag.OperandTransform $cc)), Narrow.KRC)>; @@ -3138,53 +3143,111 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2), cond)))), - (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - (Frag.OperandTransform $cc)), Narrow.KRC)>; + (Frag_su.OperandTransform $cc)), Narrow.KRC)>; +} + +multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, + PatFrag CommFrag, PatFrag CommFrag_su, + string InstStr, + X86VectorVTInfo Narrow, + X86VectorVTInfo Wide> { +// Broadcast load. +def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), cond)), + (COPY_TO_REGCLASS + (!cast<Instruction>(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), + cond)))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>; + +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)), + (COPY_TO_REGCLASS + (!cast<Instruction>(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>; } // Same as above, but for fp types which don't use PatFrags. -multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su, - string InstStr, +multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, X86VectorVTInfo Narrow, X86VectorVTInfo Wide> { -def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), imm:$cc)), +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc)), (COPY_TO_REGCLASS - (!cast<Instruction>(InstStr##Zrri) + (!cast<Instruction>(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - imm:$cc), Narrow.KRC)>; + timm:$cc), Narrow.KRC)>; def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, - (OpNode_su (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), imm:$cc))), - (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - imm:$cc), Narrow.KRC)>; -} + timm:$cc), Narrow.KRC)>; -let Predicates = [HasAVX512, NoVLX] in { - // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't - // increase the pattern complexity the way an immediate would. - let AddedComplexity = 2 in { - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>; +// Broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), + (COPY_TO_REGCLASS + (!cast<Instruction>(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>; +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>; +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc)), + (COPY_TO_REGCLASS + (!cast<Instruction>(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>; - } +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; +} +let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; @@ -3197,29 +3260,25 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; - defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>; - defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>; - defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>; - defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>; -} + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>; -let Predicates = [HasBWI, NoVLX] in { - // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't - // increase the pattern complexity the way an immediate would. - let AddedComplexity = 2 in { - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>; - defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>; - } + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; +} +let Predicates = [HasBWI, NoVLX] in { defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; @@ -4186,16 +4245,32 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; +def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), + (COPY_TO_REGCLASS + (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), + VK1WM:$mask, addr:$src)), + FR32X)>; +def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), + (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; + def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; -def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)), +def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; +def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), + (COPY_TO_REGCLASS + (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), + VK1WM:$mask, addr:$src)), + FR64X)>; +def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), + (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; + let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), @@ -4537,8 +4612,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src1, - (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))>, + (_.BroadcastLdFrag addr:$src2)))>, AVX512BIBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4664,8 +4738,7 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, "${src2}"##_Brdct.BroadcastStr##", $src1", "$src1, ${src2}"##_Brdct.BroadcastStr, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert - (_Brdct.VT (X86VBroadcast - (_Brdct.ScalarLdFrag addr:$src2))))))>, + (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, AVX512BIBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4737,8 +4810,7 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, "${src2}"##_Src.BroadcastStr##", $src1", "$src1, ${src2}"##_Src.BroadcastStr, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert - (_Src.VT (X86VBroadcast - (_Src.ScalarLdFrag addr:$src2))))))>, + (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4874,22 +4946,11 @@ let Predicates = [HasDQI, NoVLX] in { (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; - - def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), - (EXTRACT_SUBREG - (VPMULLQZrr - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), - sub_xmm)>; -} - -// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. -let Predicates = [HasDQI, NoVLX] in { - def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), + def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), (EXTRACT_SUBREG - (VPMULLQZrr + (VPMULLQZrmb (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), + addr:$src2), sub_ymm)>; def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), @@ -4898,29 +4959,47 @@ let Predicates = [HasDQI, NoVLX] in { (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; + def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), + (EXTRACT_SUBREG + (VPMULLQZrmb + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + addr:$src2), + sub_xmm)>; } -multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> { +multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), (EXTRACT_SUBREG - (Instr + (!cast<Instruction>(Instr#"rr") (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; + def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), + (EXTRACT_SUBREG + (!cast<Instruction>(Instr#"rmb") + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), + addr:$src2), + sub_ymm)>; def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), (EXTRACT_SUBREG - (Instr + (!cast<Instruction>(Instr#"rr") (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; + def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), + (EXTRACT_SUBREG + (!cast<Instruction>(Instr#"rmb") + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + addr:$src2), + sub_xmm)>; } let Predicates = [HasAVX512, NoVLX] in { - defm : avx512_min_max_lowering<VPMAXUQZrr, umax>; - defm : avx512_min_max_lowering<VPMINUQZrr, umin>; - defm : avx512_min_max_lowering<VPMAXSQZrr, smax>; - defm : avx512_min_max_lowering<VPMINSQZrr, smin>; + defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; + defm : avx512_min_max_lowering<"VPMINUQZ", umin>; + defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; + defm : avx512_min_max_lowering<"VPMINSQZ", smin>; } //===----------------------------------------------------------------------===// @@ -4977,32 +5056,6 @@ let Predicates = [HasVLX] in { def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; - def : Pat<(and VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDDZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(or VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPORDZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(xor VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPXORDZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>; - - def : Pat<(and VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(or VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPORQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(xor VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPXORQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), @@ -5042,32 +5095,6 @@ let Predicates = [HasVLX] in { (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; - - def : Pat<(and VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDDZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(or VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPORDZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(xor VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPXORDZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>; - - def : Pat<(and VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDQZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(or VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPORQZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(xor VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPXORQZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>; } let Predicates = [HasAVX512] in { @@ -5110,32 +5137,6 @@ let Predicates = [HasAVX512] in { (VPANDNQZrm VR512:$src1, addr:$src2)>; def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), (VPANDNQZrm VR512:$src1, addr:$src2)>; - - def : Pat<(and VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDDZrmb VR512:$src1, addr:$src2)>; - def : Pat<(or VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPORDZrmb VR512:$src1, addr:$src2)>; - def : Pat<(xor VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPXORDZrmb VR512:$src1, addr:$src2)>; - def : Pat<(X86andnp VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDNDZrmb VR512:$src1, addr:$src2)>; - - def : Pat<(and VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDQZrmb VR512:$src1, addr:$src2)>; - def : Pat<(or VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPORQZrmb VR512:$src1, addr:$src2)>; - def : Pat<(xor VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPXORQZrmb VR512:$src1, addr:$src2)>; - def : Pat<(X86andnp VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDNQZrmb VR512:$src1, addr:$src2)>; } // Patterns to catch vselect with different type than logic op. @@ -5174,25 +5175,17 @@ multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, X86VectorVTInfo _, X86VectorVTInfo IntInfo> { // Register-broadcast logical operations. - def : Pat<(IntInfo.VT (OpNode _.RC:$src1, - (bitconvert (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))))), - (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, - (bitconvert (_.VT - (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))))), + (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), _.RC:$src0)), (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, - (bitconvert (_.VT - (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))))), + (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), _.ImmAllZerosV)), (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; @@ -5329,7 +5322,8 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo } multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, SDNode SaeNode, - X86FoldableSchedWrite sched, bit IsCommutable> { + X86FoldableSchedWrite sched, bit IsCommutable, + string EVEX2VexOvrd> { let ExeDomain = _.ExeDomain in { defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, @@ -5349,7 +5343,8 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[sched]> { + Sched<[sched]>, + EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -5357,7 +5352,8 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, + EVEX2VEXOverride<EVEX2VexOvrd#"rm">; } defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -5387,10 +5383,12 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode VecNode, SDNode SaeNode, X86SchedWriteSizes sched, bit IsCommutable> { defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, - VecNode, SaeNode, sched.PS.Scl, IsCommutable>, + VecNode, SaeNode, sched.PS.Scl, IsCommutable, + NAME#"SS">, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, - VecNode, SaeNode, sched.PD.Scl, IsCommutable>, + VecNode, SaeNode, sched.PD.Scl, IsCommutable, + NAME#"SD">, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds, @@ -5410,13 +5408,14 @@ defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, // X86fminc and X86fmaxc instead of X86fmin and X86fmax multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, - X86FoldableSchedWrite sched> { + X86FoldableSchedWrite sched, + string EVEX2VEXOvrd> { let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[sched]> { + Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { let isCommutable = 1; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -5424,24 +5423,27 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, + EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, - SchedWriteFCmp.Scl>, XS, EVEX_4V, - VEX_LIG, EVEX_CD8<32, CD8VT1>; + SchedWriteFCmp.Scl, "VMINCSS">, XS, + EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, - SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, - VEX_LIG, EVEX_CD8<64, CD8VT1>; + SchedWriteFCmp.Scl, "VMINCSD">, XD, + VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, - SchedWriteFCmp.Scl>, XS, EVEX_4V, - VEX_LIG, EVEX_CD8<32, CD8VT1>; + SchedWriteFCmp.Scl, "VMAXCSS">, XS, + EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, - SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, - VEX_LIG, EVEX_CD8<64, CD8VT1>; + SchedWriteFCmp.Scl, "VMAXCSD">, XD, + VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, X86VectorVTInfo _, X86FoldableSchedWrite sched, @@ -5464,8 +5466,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, - (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))>, + (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -5595,8 +5596,7 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, - (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))>, + (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5751,13 +5751,13 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", - (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>, + (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, Sched<[sched]>; defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), - (i8 imm:$src2)))>, + (i8 timm:$src2)))>, Sched<[sched.Folded]>; } } @@ -5769,7 +5769,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", - (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>, + (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, EVEX_B, Sched<[sched.Folded]>; } @@ -5911,17 +5911,17 @@ let Predicates = [HasAVX512, NoVLX] in { (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), VR128X:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))), + def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; - def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; + timm:$src2)), sub_xmm)>; } //===-------------------------------------------------------------------===// @@ -5953,8 +5953,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, - (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))))>, + (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6062,27 +6061,27 @@ let Predicates = [HasAVX512, NoVLX] in { (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; - def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; - def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; } // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. @@ -6113,27 +6112,27 @@ let Predicates = [HasAVX512, NoVLX] in { (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; - def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; - def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; } //===-------------------------------------------------------------------===// @@ -6228,8 +6227,7 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src1, - (Ctrl.VT (X86VBroadcast - (Ctrl.ScalarLdFrag addr:$src2)))))>, + (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6419,7 +6417,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src2, - _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>, + _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6493,7 +6491,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", "$src2, ${src3}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src2, - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), + (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6571,7 +6569,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", "$src2, ${src3}"##_.BroadcastStr, - (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), + (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1, _.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6964,7 +6962,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src2, - (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))), + (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1)>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -7504,14 +7502,13 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, OpcodeStr, "${src}"##Broadcast, "${src}"##Broadcast, (_.VT (OpNode (_Src.VT - (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) + (_Src.BroadcastLdFrag addr:$src)) )), (vselect MaskRC:$mask, (_.VT (OpNode (_Src.VT - (X86VBroadcast - (_Src.ScalarLdFrag addr:$src))))), + (_Src.BroadcastLdFrag addr:$src)))), _.RC:$src0), vselect, "$src0 = $dst">, EVEX, EVEX_B, Sched<[sched.Folded]>; @@ -7646,14 +7643,14 @@ let Predicates = [HasAVX512] in { v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>; - def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2PSZrmb addr:$src)>; def : Pat<(vselect VK8WM:$mask, - (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), + (fpround (v8f64 (X86VBroadcastld64 addr:$src))), (v8f32 VR256X:$src0)), (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, - (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), + (fpround (v8f64 (X86VBroadcastld64 addr:$src))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>; } @@ -7677,14 +7674,14 @@ let Predicates = [HasVLX] in { v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>; - def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2PSZ256rmb addr:$src)>; def : Pat<(vselect VK4WM:$mask, - (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), VR128X:$src0), (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, - (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; @@ -7708,12 +7705,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))), + def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))), (VCVTPD2PSZ128rmb addr:$src)>; - def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), v4f32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>; } @@ -8194,12 +8191,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2DQZ128rmb addr:$src)>; - def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8223,12 +8220,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2DQZ128rmb addr:$src)>; - def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8252,12 +8249,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2UDQZ128rmb addr:$src)>; - def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8281,12 +8278,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2UDQZ128rmb addr:$src)>; - def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; } @@ -8419,12 +8416,12 @@ let Predicates = [HasDQI, HasVLX] in { VK2WM:$mask), (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))), + def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), (VCVTQQ2PSZ128rmb addr:$src)>; - def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), v4f32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8448,12 +8445,12 @@ let Predicates = [HasDQI, HasVLX] in { VK2WM:$mask), (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))), + def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), (VCVTUQQ2PSZ128rmb addr:$src)>; - def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), v4f32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; } @@ -8576,21 +8573,21 @@ let ExeDomain = GenericDomain in { (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _dest.RC:$dst, - (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>, + (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, Sched<[RR]>; let Constraints = "$src0 = $dst" in def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _dest.RC:$dst, - (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2), + (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), _dest.RC:$src0, _src.KRCWM:$mask))]>, Sched<[RR]>, EVEX_K; def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", [(set _dest.RC:$dst, - (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2), + (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, Sched<[RR]>, EVEX_KZ; let hasSideEffects = 0, mayStore = 1 in { @@ -8631,17 +8628,17 @@ let Predicates = [HasAVX512] in { } def : Pat<(store (f64 (extractelt - (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), + (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), (iPTR 0))), addr:$dst), - (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; + (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; def : Pat<(store (i64 (extractelt - (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), + (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), (iPTR 0))), addr:$dst), - (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; - def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst), - (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>; - def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst), - (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>; + (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; + def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), + (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; + def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), + (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. @@ -8765,7 +8762,7 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (OpNode (_.VT - (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, + (_.BroadcastLdFrag addr:$src)))>, EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8859,7 +8856,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (OpNode (_.VT - (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, + (_.BroadcastLdFrag addr:$src)))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8940,7 +8937,7 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (fsqrt (_.VT - (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, + (_.BroadcastLdFrag addr:$src)))>, EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -9049,14 +9046,14 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3)))>, + (i32 timm:$src3)))>, Sched<[sched]>; defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3)))>, EVEX_B, + (i32 timm:$src3)))>, EVEX_B, Sched<[sched]>; defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -9064,7 +9061,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales _.RC:$src1, - _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>, + _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { @@ -9082,15 +9079,15 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, } let Predicates = [HasAVX512] in { - def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2), + def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2), (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), - _.FRC:$src1, imm:$src2))>; + _.FRC:$src1, timm:$src2))>; } let Predicates = [HasAVX512, OptForSize] in { - def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2), + def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), - addr:$src1, imm:$src2))>; + addr:$src1, timm:$src2))>; } } @@ -10109,19 +10106,19 @@ multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNo (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), - (i32 imm:$src2))>, Sched<[sched]>; + (i32 timm:$src2))>, Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), - (i32 imm:$src2))>, + (i32 timm:$src2))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", - (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2))>, EVEX_B, + (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), + (i32 timm:$src2))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10136,7 +10133,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, OpcodeStr##_.Suffix, "$src2, {sae}, $src1", "$src1, {sae}, $src2", (OpNode (_.VT _.RC:$src1), - (i32 imm:$src2))>, + (i32 timm:$src2))>, EVEX_B, Sched<[sched]>; } @@ -10169,22 +10166,22 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), - (i32 imm:$src3))>, + (i32 timm:$src3))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $src3", (OpNode (_.VT _.RC:$src1), - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i32 imm:$src3))>, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src2)), + (i32 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10200,7 +10197,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (SrcInfo.VT SrcInfo.RC:$src2), - (i8 imm:$src3)))>, + (i8 timm:$src3)))>, Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), @@ -10208,7 +10205,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (SrcInfo.VT (bitconvert (SrcInfo.LdFrag addr:$src2))), - (i8 imm:$src3)))>, + (i8 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10226,8 +10223,8 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $src3", (OpNode (_.VT _.RC:$src1), - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i8 imm:$src3))>, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src2)), + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -10241,15 +10238,14 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, Sched<[sched]>; defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), + (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), - (_.VT (scalar_to_vector - (_.ScalarLdFrag addr:$src2))), - (i32 imm:$src3))>, + (_.VT _.ScalarIntMemCPat:$src2), + (i32 timm:$src3))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10265,7 +10261,7 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, "$src1, $src2, {sae}, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, EVEX_B, Sched<[sched]>; } @@ -10279,7 +10275,7 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode "$src1, $src2, {sae}, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, EVEX_B, Sched<[sched]>; } @@ -10401,7 +10397,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, - (i8 imm:$src3)))))>, + (i8 timm:$src3)))))>, Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), @@ -10410,7 +10406,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, (CastInfo.LdFrag addr:$src2), - (i8 imm:$src3)))))>, + (i8 timm:$src3)))))>, Sched<[sched.Folded, sched.ReadAfterFold]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -10421,8 +10417,8 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - (i8 imm:$src3)))))>, EVEX_B, + (_.BroadcastLdFrag addr:$src2), + (i8 timm:$src3)))))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10491,14 +10487,14 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr, defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", - (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>, + (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86VAlign _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)), - (i8 imm:$src3)))>, + (i8 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>, EVEX2VEXOverride<"VPALIGNRrmi">; @@ -10507,8 +10503,8 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr, OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $src3", (X86VAlign _.RC:$src1, - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i8 imm:$src3))>, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src2)), + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10541,13 +10537,13 @@ defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", // Fragments to help convert valignq into masked valignd. Or valignq/valignd // into vpalignr. -def ValignqImm32XForm : SDNodeXForm<imm, [{ +def ValignqImm32XForm : SDNodeXForm<timm, [{ return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); }]>; -def ValignqImm8XForm : SDNodeXForm<imm, [{ +def ValignqImm8XForm : SDNodeXForm<timm, [{ return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); }]>; -def ValigndImm8XForm : SDNodeXForm<imm, [{ +def ValigndImm8XForm : SDNodeXForm<timm, [{ return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); }]>; @@ -10557,40 +10553,40 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, From.RC:$src2, - imm:$src3))), + timm:$src3))), To.RC:$src0)), (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, From.RC:$src2, - imm:$src3))), + timm:$src3))), To.ImmAllZerosV)), (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (From.LdFrag addr:$src2), - imm:$src3))), + timm:$src3))), To.RC:$src0)), (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (From.LdFrag addr:$src2), - imm:$src3))), + timm:$src3))), To.ImmAllZerosV)), (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; } multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, @@ -10599,35 +10595,32 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, SDNodeXForm ImmXForm> : avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { def : Pat<(From.VT (OpNode From.RC:$src1, - (bitconvert (To.VT (X86VBroadcast - (To.ScalarLdFrag addr:$src2)))), - imm:$src3)), + (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3)), (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (bitconvert - (To.VT (X86VBroadcast - (To.ScalarLdFrag addr:$src2)))), - imm:$src3))), + (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3))), To.RC:$src0)), (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (bitconvert - (To.VT (X86VBroadcast - (To.ScalarLdFrag addr:$src2)))), - imm:$src3))), + (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3))), To.ImmAllZerosV)), (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; } let Predicates = [HasAVX512] in { @@ -10666,13 +10659,13 @@ multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", - (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase, + (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", - (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>, + (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; } @@ -10685,8 +10678,7 @@ multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.ScalarMemOp:$src1), OpcodeStr, "${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr, - (_.VT (OpNode (X86VBroadcast - (_.ScalarLdFrag addr:$src1))))>, + (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; } @@ -10770,7 +10762,7 @@ let Predicates = [HasAVX512, NoVLX] in { multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, AVX512VLVectorVTInfo _, Predicate prd> { let Predicates = [prd, NoVLX] in { - def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), + def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), (EXTRACT_SUBREG (!cast<Instruction>(InstrStr # "Zrr") (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), @@ -10778,7 +10770,7 @@ multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, _.info256.SubRegIdx)), _.info256.SubRegIdx)>; - def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), + def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), (EXTRACT_SUBREG (!cast<Instruction>(InstrStr # "Zrr") (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), @@ -10829,17 +10821,16 @@ defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// -multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr, "$src", "$src", - (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX, + (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", - (_.VT (OpNode (_.VT (scalar_to_vector - (_.ScalarLdFrag addr:$src)))))>, + (_.VT (_.BroadcastLdFrag addr:$src))>, EVEX, EVEX_CD8<_.EltSize, CD8VH>, Sched<[sched.Folded]>; } @@ -10853,7 +10844,7 @@ multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX512, HasVLX] in { defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, VTInfo.info256>, EVEX_V256; - defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM, + defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, VTInfo.info128>, EVEX_V128; } } @@ -10867,11 +10858,9 @@ multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode, defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; let Predicates = [HasVLX] in { -def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), - (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; -def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), +def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), (VMOVDDUPZ128rm addr:$src)>; @@ -10884,17 +10873,17 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), immAllZerosV), (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), immAllZerosV), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), immAllZerosV), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } @@ -11070,14 +11059,14 @@ multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, def rr : AVX512<opc, MRMr, (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>, + [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, Sched<[sched]>; def rm : AVX512<opc, MRMm, (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst,(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), - (i8 imm:$src2))))]>, + (i8 timm:$src2))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -11104,6 +11093,7 @@ defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _dst, X86VectorVTInfo _src> { + let isCommutable = 1 in def rr : AVX512BI<opc, MRMSrcReg, (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -11140,7 +11130,7 @@ defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", // Transforms to swizzle an immediate to enable better matching when // memory operand isn't in the right place. -def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{ +def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. uint8_t Imm = N->getZExtValue(); // Swap bits 1/4 and 3/6. @@ -11151,7 +11141,7 @@ def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{ if (Imm & 0x40) NewImm |= 0x08; return getI8Imm(NewImm, SDLoc(N)); }]>; -def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{ +def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. uint8_t Imm = N->getZExtValue(); // Swap bits 2/4 and 3/5. @@ -11162,7 +11152,7 @@ def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{ if (Imm & 0x20) NewImm |= 0x08; return getI8Imm(NewImm, SDLoc(N)); }]>; -def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{ +def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. uint8_t Imm = N->getZExtValue(); // Swap bits 1/2 and 5/6. @@ -11173,7 +11163,7 @@ def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{ if (Imm & 0x40) NewImm |= 0x20; return getI8Imm(NewImm, SDLoc(N)); }]>; -def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{ +def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ // Convert a VPTERNLOG immediate by moving operand 1 to the end. uint8_t Imm = N->getZExtValue(); // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 @@ -11186,7 +11176,7 @@ def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{ if (Imm & 0x40) NewImm |= 0x20; return getI8Imm(NewImm, SDLoc(N)); }]>; -def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{ +def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. uint8_t Imm = N->getZExtValue(); // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 @@ -11210,7 +11200,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT _.RC:$src3), - (i8 imm:$src4)), 1, 1>, + (i8 timm:$src4)), 1, 1>, AVX512AIi8Base, EVEX_4V, Sched<[sched]>; defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), @@ -11218,7 +11208,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT (bitconvert (_.LdFrag addr:$src3))), - (i8 imm:$src4)), 1, 0>, + (i8 timm:$src4)), 1, 0>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -11227,146 +11217,145 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, "$src2, ${src3}"##_.BroadcastStr##", $src4", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), - (i8 imm:$src4)), 1, 0>, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src3)), + (i8 timm:$src4)), 1, 0>, EVEX_B, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; }// Constraints = "$src1 = $dst" // Additional patterns for matching passthru operand in other positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)), + (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>; + _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; // Additional patterns for matching loads in other positions. def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4))), + _.RC:$src2, (i8 timm:$src4))), (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching zero masking with loads in other // positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + _.RC:$src2, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching masked loads with different // operand orders. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, - (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)), + (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src1, (i8 imm:$src4)), + _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), + _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; // Additional patterns for matching broadcasts in other positions. - def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), + def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4))), + (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, (i8 timm:$src4))), (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching zero masking with broadcasts in other // positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, - (VPTERNLOG321_imm8 imm:$src4))>; + (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, - (VPTERNLOG132_imm8 imm:$src4))>; + (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching masked broadcasts with different // operand orders. def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - (i8 imm:$src4)), _.RC:$src1)), + (_.BroadcastLdFrag addr:$src3), + (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src1, (i8 imm:$src4)), + (_.BroadcastLdFrag addr:$src3), + _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), + (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; } multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, @@ -11387,6 +11376,113 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, avx512vl_i64_info>, VEX_W; +// Patterns to use VPTERNLOG for vXi16/vXi8 vectors. +let Predicates = [HasVLX] in { + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, + (loadv16i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2, + VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, + (loadv8i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2, + VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, + (loadv32i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2, + VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, + (loadv16i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2, + VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; +} + +let Predicates = [HasAVX512] in { + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, + (loadv64i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2, + VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (loadv32i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2, + VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; +} + // Patterns to implement vnot using vpternlog instead of creating all ones // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen // so that the result is only dependent on src0. But we use the same source @@ -11498,14 +11594,14 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT _.RC:$src3), - (i32 imm:$src4))>, Sched<[sched]>; + (i32 timm:$src4))>, Sched<[sched]>; defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), - (i32 imm:$src4))>, + (i32 timm:$src4))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), @@ -11513,8 +11609,8 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, "$src2, ${src3}"##_.BroadcastStr##", $src4", (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))), - (i32 imm:$src4))>, + (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), + (i32 timm:$src4))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } // Constraints = "$src1 = $dst" } @@ -11531,7 +11627,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { (X86VFixupimmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT _.RC:$src3), - (i32 imm:$src4))>, + (i32 timm:$src4))>, EVEX_B, Sched<[sched]>; } } @@ -11547,7 +11643,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, (X86VFixupimms (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), - (i32 imm:$src4))>, Sched<[sched]>; + (i32 timm:$src4))>, Sched<[sched]>; defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", @@ -11555,7 +11651,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, (X86VFixupimmSAEs (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), - (i32 imm:$src4))>, + (i32 timm:$src4))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), @@ -11564,13 +11660,13 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, (_.VT _.RC:$src2), (_src3VT.VT (scalar_to_vector (_src3VT.ScalarLdFrag addr:$src3))), - (i32 imm:$src4))>, + (i32 timm:$src4))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, - AVX512VLVectorVTInfo _Vec, + AVX512VLVectorVTInfo _Vec, AVX512VLVectorVTInfo _Tbl> { let Predicates = [HasAVX512] in defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, @@ -11804,7 +11900,7 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, "${src3}"##VTI.BroadcastStr##", $src2", "$src2, ${src3}"##VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, + (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -11880,12 +11976,14 @@ defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, let Constraints = "$src1 = $dst" in multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, - X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { + X86FoldableSchedWrite sched, X86VectorVTInfo VTI, + bit IsCommutable> { defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, "$src3, $src2", "$src2, $src3", (VTI.VT (OpNode VTI.RC:$src1, - VTI.RC:$src2, VTI.RC:$src3))>, + VTI.RC:$src2, VTI.RC:$src3)), + IsCommutable, IsCommutable>, EVEX_4V, T8PD, Sched<[sched]>; defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, @@ -11899,27 +11997,58 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, OpStr, "${src3}"##VTI.BroadcastStr##", $src2", "$src2, ${src3}"##VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (X86VBroadcast - (VTI.ScalarLdFrag addr:$src3))))>, + (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, - X86SchedWriteWidths sched> { + X86SchedWriteWidths sched, bit IsCommutable> { let Predicates = [HasVNNI] in - defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512; + defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, + IsCommutable>, EVEX_V512; let Predicates = [HasVNNI, HasVLX] in { - defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256; - defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128; + defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, + IsCommutable>, EVEX_V256; + defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, + IsCommutable>, EVEX_V128; } } // FIXME: Is there a better scheduler class for VPDP? -defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>; -defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>; -defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>; -defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>; +defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; +defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; +defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; +defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; + +def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), + (X86vpmaddwd node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; + +// Patterns to match VPDPWSSD from existing instructions/intrinsics. +let Predicates = [HasVNNI] in { + def : Pat<(v16i32 (add VR512:$src1, + (X86vpmaddwd_su VR512:$src2, VR512:$src3))), + (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; + def : Pat<(v16i32 (add VR512:$src1, + (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), + (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; +} +let Predicates = [HasVNNI,HasVLX] in { + def : Pat<(v8i32 (add VR256X:$src1, + (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), + (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; + def : Pat<(v8i32 (add VR256X:$src1, + (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), + (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; + def : Pat<(v4i32 (add VR128X:$src1, + (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), + (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; + def : Pat<(v4i32 (add VR128X:$src1, + (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), + (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; +} //===----------------------------------------------------------------------===// // Bit Algorithms @@ -12004,8 +12133,8 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", (OpNode (VTI.VT VTI.RC:$src1), - (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), - (i8 imm:$src3))>, EVEX_B, + (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -12116,7 +12245,7 @@ multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> { !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), [(set _.KRPC:$dst, (X86vp2intersect - _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>, + _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; } @@ -12217,12 +12346,12 @@ let Predicates = [HasBF16, HasVLX] in { (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 - (X86VBroadcast (loadf32 addr:$src))))), + (X86VBroadcastld32 addr:$src)))), (VCVTNEPS2BF16Z128rmb addr:$src)>; - def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))), + def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), (v8i16 VR128X:$src0), VK4WM:$mask), (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; - def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))), + def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), v8i16x_info.ImmAllZerosV, VK4WM:$mask), (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; } @@ -12249,7 +12378,7 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), (_.VT (OpNode _.RC:$src1, _.RC:$src2, - (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>, + (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, EVEX_B, EVEX_4V; } |