diff options
Diffstat (limited to 'lib/Target')
23 files changed, 536 insertions, 227 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index e39c6995e367..f54db0aa03b2 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -115,7 +115,7 @@ def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true", def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true", "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>; -def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true", +def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true", "Enable bit permutation SVE2 instructions", [FeatureSVE2]>; def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 7becc99fb5c7..6c250aea39f0 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -606,6 +606,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; + MaxLoadsPerMemcmpOptSize = 4; + MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() + ? MaxLoadsPerMemcmpOptSize : 8; + setStackPointerRegisterToSaveRestore(AArch64::SP); setSchedulingPreference(Sched::Hybrid); @@ -5661,8 +5665,6 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const { switch (Constraint[0]) { default: break; - case 'z': - return C_Other; case 'x': case 'w': return C_RegisterClass; @@ -5670,6 +5672,16 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const { // currently handle addresses it is the same as 'r'. case 'Q': return C_Memory; + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'Y': + case 'Z': + return C_Immediate; + case 'z': case 'S': // A symbolic address return C_Other; } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index eed53f36d574..020035c7f6c3 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -116,7 +116,7 @@ def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">; def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, - AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">; + AssemblerPredicate<"FeatureSVE2BitPerm", "sve2-bitperm">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicate<"FeatureRCPC", "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td index 79ab42f4c080..8e1ff999bd57 100644 --- a/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1164,6 +1164,13 @@ let Predicates = [HasSVE2] in { defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">; defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">; + // SVE2 predicated shifts + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">; + defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">; + // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">; defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">; @@ -1199,14 +1206,14 @@ let Predicates = [HasSVE2] in { defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">; // SVE2 bitwise shift and insert - defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">; - defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">; + defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">; + defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">; // SVE2 bitwise shift right and accumulate - defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">; - defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">; - defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">; - defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">; + defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">; + defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">; + defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">; + defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">; // SVE2 complex integer add defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">; @@ -1228,41 +1235,47 @@ let Predicates = [HasSVE2] in { defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">; defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">; - // SVE2 bitwise shift right narrow - defm SQSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">; - defm SQSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">; - defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">; - defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">; - defm SHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">; - defm SHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">; - defm RSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">; - defm RSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">; - defm SQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">; - defm SQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">; - defm SQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">; - defm SQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">; - defm UQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">; - defm UQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">; - defm UQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">; - defm UQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">; - - // SVE2 integer add/subtract narrow high part - defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b000, "addhnb">; - defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b001, "addhnt">; - defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">; - defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">; - defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b100, "subhnb">; - defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b101, "subhnt">; - defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">; - defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">; - - // SVE2 saturating extract narrow - defm SQXTNB_ZZ : sve2_int_sat_extract_narrow<0b000, "sqxtnb">; - defm SQXTNT_ZZ : sve2_int_sat_extract_narrow<0b001, "sqxtnt">; - defm UQXTNB_ZZ : sve2_int_sat_extract_narrow<0b010, "uqxtnb">; - defm UQXTNT_ZZ : sve2_int_sat_extract_narrow<0b011, "uqxtnt">; - defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">; - defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">; + // SVE2 bitwise shift right narrow (bottom) + defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">; + defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">; + defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">; + defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">; + defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">; + defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">; + defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">; + defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">; + + // SVE2 bitwise shift right narrow (top) + defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">; + defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">; + defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">; + defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">; + defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">; + defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">; + defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">; + defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">; + + // SVE2 integer add/subtract narrow high part (bottom) + defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">; + defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">; + defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">; + defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">; + + // SVE2 integer add/subtract narrow high part (top) + defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">; + defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">; + defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">; + defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">; + + // SVE2 saturating extract narrow (bottom) + defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">; + defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">; + defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">; + + // SVE2 saturating extract narrow (top) + defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">; + defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">; + defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">; // SVE2 character match defm MATCH_PPzZZ : sve2_char_match<0b0, "match">; @@ -1289,10 +1302,14 @@ let Predicates = [HasSVE2] in { // SVE2 histogram generation (vector) defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">; + // SVE2 floating-point base 2 logarithm as integer + defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; + // SVE2 floating-point convert precision defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">; defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">; defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">; + def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; // SVE2 floating-point pairwise operations defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">; @@ -1321,58 +1338,45 @@ let Predicates = [HasSVE2] in { def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">; def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">; - // sve_int_rotate_imm + // SVE2 bitwise xor and rotate right by immediate defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">; // SVE2 extract vector (immediate offset, constructive) def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; - // SVE floating-point convert precision - def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; - - // SVE floating-point convert to integer - defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; - - // Non-temporal contiguous loads (vector + register) - defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>; - defm LDNT1B_ZZR_S : sve2_mem_cldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>; - defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>; - defm LDNT1H_ZZR_S : sve2_mem_cldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>; - defm LDNT1W_ZZR_S : sve2_mem_cldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>; - - defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>; - defm LDNT1B_ZZR_D : sve2_mem_cldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>; - defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>; - defm LDNT1H_ZZR_D : sve2_mem_cldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>; - defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>; - defm LDNT1W_ZZR_D : sve2_mem_cldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>; - defm LDNT1D_ZZR_D : sve2_mem_cldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>; + // SVE2 non-temporal gather loads + defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>; + defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>; + defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>; + defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>; + defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>; + + defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>; + defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>; + defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>; + defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>; + defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>; + defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>; + defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>; // SVE2 vector splice (constructive) defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">; - // Predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">; - defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">; - - // Non-temporal contiguous stores (vector + register) - defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>; - defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>; - defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>; + // SVE2 non-temporal scatter stores + defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>; + defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>; + defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>; - defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>; - defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>; - defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>; - defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>; + defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>; + defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>; + defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>; + defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>; - // SVE table lookup (three sources) + // SVE2 table lookup (three sources) defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">; defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">; - // SVE integer compare scalar count and limit + // SVE2 integer compare scalar count and limit defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">; defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">; defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">; @@ -1383,7 +1387,7 @@ let Predicates = [HasSVE2] in { defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">; defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">; - // SVE pointer conflict compare + // SVE2 pointer conflict compare defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">; defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">; } diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a4b78f2a7d6b..301bf72d5239 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -618,6 +618,19 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } +AArch64TTIImpl::TTI::MemCmpExpansionOptions +AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + Options.AllowOverlappingLoads = !ST->requiresStrictAlign(); + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + // TODO: Though vector loads usually perform well on AArch64, in some targets + // they may wake up the FP unit, which raises the power consumption. Perhaps + // they could be used with no holds barred (-O3). + Options.LoadSizes = {8, 4, 2, 1}; + return Options; +} + int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Alignment, unsigned AddressSpace, const Instruction *I) { diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 10c15a139b4c..95cda63b0174 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -130,6 +130,9 @@ public: int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I = nullptr); + TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, + bool IsZeroCmp) const; + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I = nullptr); diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f4c55d48d215..09b42811f786 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2840,7 +2840,7 @@ static const struct Extension { {"sve2-aes", {AArch64::FeatureSVE2AES}}, {"sve2-sm4", {AArch64::FeatureSVE2SM4}}, {"sve2-sha3", {AArch64::FeatureSVE2SHA3}}, - {"bitperm", {AArch64::FeatureSVE2BitPerm}}, + {"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}}, // FIXME: Unsupported extensions {"pan", {}}, {"lor", {}}, diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td index 808e59467081..dfd6c576e99b 100644 --- a/lib/Target/AArch64/SVEInstrFormats.td +++ b/lib/Target/AArch64/SVEInstrFormats.td @@ -403,12 +403,12 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm> { } class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg), - asm, "\t$Zdn, $Pg", + ZPRRegOp zprty, PPRRegOp pprty> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, pprty:$Pm), + asm, "\t$Zdn, $Pm", "", []>, Sched<[]> { - bits<4> Pg; + bits<4> Pm; bits<5> Zdn; let Inst{31-24} = 0b00100101; let Inst{23-22} = sz8_64; @@ -416,7 +416,7 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm, let Inst{18-16} = opc{4-2}; let Inst{15-11} = 0b10000; let Inst{10-9} = opc{1-0}; - let Inst{8-5} = Pg; + let Inst{8-5} = Pm; let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; @@ -425,9 +425,16 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm, } multiclass sve_int_count_v<bits<5> opc, string asm> { - def _H : sve_int_count_v<0b01, opc, asm, ZPR16>; - def _S : sve_int_count_v<0b10, opc, asm, ZPR32>; - def _D : sve_int_count_v<0b11, opc, asm, ZPR64>; + def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>; + def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>; + def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>; + + def : InstAlias<asm # "\t$Zdn, $Pm", + (!cast<Instruction>(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>; + def : InstAlias<asm # "\t$Zdn, $Pm", + (!cast<Instruction>(NAME # "_S") ZPR32:$Zdn, PPRAny:$Pm), 0>; + def : InstAlias<asm # "\t$Zdn, $Pm", + (!cast<Instruction>(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>; } class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm, @@ -744,7 +751,7 @@ multiclass sve2_int_perm_tbl<string asm> { } class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), +: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, zprty:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { @@ -758,6 +765,8 @@ class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty> let Inst{15-10} = 0b001011; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } multiclass sve2_int_perm_tbx<string asm> { @@ -1489,7 +1498,7 @@ multiclass sve_fp_fcadd<string asm> { class sve2_fp_convert_precision<bits<4> opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn), +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn), asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> { @@ -1504,6 +1513,8 @@ class sve2_fp_convert_precision<bits<4> opc, string asm, let Inst{12-10} = Pg; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } multiclass sve2_fp_convert_down_narrow<string asm> { @@ -2399,21 +2410,40 @@ multiclass sve2_misc_bitwise<bits<4> opc, string asm> { def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>; } -multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> { - let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in { - def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8, ZPR8>; - def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>; - def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>; - def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>; - } -} - multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm> { def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>; def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>; def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>; } +class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm), + asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b01000101; + let Inst{23-22} = sz; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-11} = 0b10010; + let Inst{10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> { + def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>; + def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>; + def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>; + def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>; +} + class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2, Operand immtype> @@ -2451,9 +2481,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> { // SVE2 Accumulate Group //===----------------------------------------------------------------------===// -class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm), +class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, immtype:$imm), asm, "\t$Zd, $Zn, $imm", "", []>, Sched<[]> { bits<5> Zd; @@ -2468,38 +2498,40 @@ class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm, let Inst{10} = opc; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_bin_cons_shift_imm_left<bit opc, string asm> { - def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { +multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> { + def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } } -multiclass sve2_int_bin_cons_shift_imm_right<bit opc, string asm> { - def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { +multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> { + def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } } -class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm, - ZPRRegOp zprty, Operand immtype> +class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty, Operand immtype> : I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm), asm, "\t$Zda, $Zn, $imm", "", []>, Sched<[]> { @@ -2521,15 +2553,15 @@ class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm let ElementSize = ElementSizeNone; } -multiclass sve2_int_bin_accum_cons_shift_imm_right<bits<2> opc, string asm> { - def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { +multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> { + def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : sve2_int_bin_accum_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : sve2_int_bin_accum_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } @@ -2607,9 +2639,9 @@ multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> { // SVE2 Narrowing Group //===----------------------------------------------------------------------===// -class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc, - string asm, ZPRRegOp zprty1, - ZPRRegOp zprty2, Operand immtype> +class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc, + string asm, ZPRRegOp zprty1, + ZPRRegOp zprty2, Operand immtype> : I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm), asm, "\t$Zd, $Zn, $imm", "", []>, Sched<[]> { @@ -2622,26 +2654,63 @@ class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc, let Inst{20-19} = tsz8_64{1-0}; let Inst{18-16} = imm{2-0}; // imm3 let Inst{15-14} = 0b00; - let Inst{13-10} = opc; + let Inst{13-11} = opc; + let Inst{10} = 0b0; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm> { + def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, + vecshiftR8>; + def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, + vecshiftR16> { + let Inst{19} = imm{3}; + } + def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, + vecshiftR32> { + let Inst{20-19} = imm{4-3}; + } +} + +class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc, + string asm, ZPRRegOp zprty1, + ZPRRegOp zprty2, Operand immtype> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, immtype:$imm), + asm, "\t$Zd, $Zn, $imm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> imm; + let Inst{31-23} = 0b010001010; + let Inst{22} = tsz8_64{2}; + let Inst{21} = 0b1; + let Inst{20-19} = tsz8_64{1-0}; + let Inst{18-16} = imm{2-0}; // imm3 + let Inst{15-14} = 0b00; + let Inst{13-11} = opc; + let Inst{10} = 0b1; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_bin_cons_shift_imm_right_narrow<bits<4> opc, string asm> { - def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16, - vecshiftR8>; - def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32, - vecshiftR16> { +multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm> { + def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, + vecshiftR8>; + def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, + vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64, - vecshiftR32> { + def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, + vecshiftR32> { let Inst{20-19} = imm{4-3}; } } -class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> +class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> : I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zd; @@ -2652,19 +2721,46 @@ class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm, let Inst{21} = 0b1; let Inst{20-16} = Zm; let Inst{15-13} = 0b011; - let Inst{12-10} = opc; // S, R, T + let Inst{12-11} = opc; // S, R + let Inst{10} = 0b0; // Top + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm> { + def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>; +} + +class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm), + asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b01000101; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b011; + let Inst{12-11} = opc; // S, R + let Inst{10} = 0b1; // Top let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_addsub_narrow_high<bits<3> opc, string asm> { - def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>; +multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm> { + def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>; } -class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> +class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> : I<(outs zprty1:$Zd), (ins zprty2:$Zn), asm, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<5> Zd; @@ -2674,15 +2770,41 @@ class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm, let Inst{21} = 0b1; let Inst{20-19} = tsz8_64{1-0}; let Inst{18-13} = 0b000010; - let Inst{12-10} = opc; + let Inst{12-11} = opc; + let Inst{10} = 0b0; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm> { + def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>; +} + +class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn), + asm, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-23} = 0b010001010; + let Inst{22} = tsz8_64{2}; + let Inst{21} = 0b1; + let Inst{20-19} = tsz8_64{1-0}; + let Inst{18-13} = 0b000010; + let Inst{12-11} = opc; + let Inst{10} = 0b1; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_sat_extract_narrow<bits<3> opc, string asm> { - def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>; +multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm> { + def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>; } //===----------------------------------------------------------------------===// @@ -3886,9 +4008,9 @@ multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty, (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; } -class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm, - RegisterOperand VecList> -: I<(outs VecList:$Zt), iops, +class sve2_mem_sstnt_vs_base<bits<3> opc, string asm, + RegisterOperand listty, ZPRRegOp zprty> +: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), asm, "\t$Zt, $Pg, [$Zn, $Rm]", "", []>, Sched<[]> { @@ -3908,17 +4030,14 @@ class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm, let mayStore = 1; } -multiclass sve2_mem_cstnt_vs<bits<3> opc, string asm, +multiclass sve2_mem_sstnt_vs<bits<3> opc, string asm, RegisterOperand listty, ZPRRegOp zprty> { - def _REAL : sve2_mem_cstnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), - asm, listty>; + def _REAL : sve2_mem_sstnt_vs_base<opc, asm, listty, zprty>; def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]", (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]", (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>; - def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]", - (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]", (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>; } @@ -5094,7 +5213,7 @@ multiclass sve_mem_p_fill<string asm> { (!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; } -class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm, +class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm, RegisterOperand VecList> : I<(outs VecList:$Zt), iops, asm, "\t$Zt, $Pg/z, [$Zn, $Rm]", @@ -5119,17 +5238,15 @@ class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm, let mayLoad = 1; } -multiclass sve2_mem_cldnt_vs<bits<5> opc, string asm, +multiclass sve2_mem_gldnt_vs<bits<5> opc, string asm, RegisterOperand listty, ZPRRegOp zprty> { - def _REAL : sve2_mem_cldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), + def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), asm, listty>; def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]", (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]", (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>; - def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]", - (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]", (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>; } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 18bb9bf3eccc..d390c9e237e6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -14369,7 +14369,8 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const { /// constraint it is for this target. ARMTargetLowering::ConstraintType ARMTargetLowering::getConstraintType(StringRef Constraint) const { - if (Constraint.size() == 1) { + unsigned S = Constraint.size(); + if (S == 1) { switch (Constraint[0]) { default: break; case 'l': return C_RegisterClass; @@ -14377,12 +14378,12 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const { case 'h': return C_RegisterClass; case 'x': return C_RegisterClass; case 't': return C_RegisterClass; - case 'j': return C_Other; // Constant for movw. - // An address with a single base register. Due to the way we - // currently handle addresses it is the same as an 'r' memory constraint. + case 'j': return C_Immediate; // Constant for movw. + // An address with a single base register. Due to the way we + // currently handle addresses it is the same as an 'r' memory constraint. case 'Q': return C_Memory; } - } else if (Constraint.size() == 2) { + } else if (S == 2) { switch (Constraint[0]) { default: break; case 'T': return C_RegisterClass; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index cfeb13c6acb6..fa266c41080c 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -592,6 +592,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { [(ARMbrjt tGPR:$target, tjumptable:$jt)]>, Sched<[WriteBrTbl]> { let Size = 2; + let isNotDuplicable = 1; list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } } @@ -1465,7 +1466,7 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them // and make use of the same compressed jump table format as Thumb-2. let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1, - isIndirectBranch = 1 in { + isIndirectBranch = 1, isNotDuplicable = 1 in { def tTBB_JT : tPseudoInst<(outs), (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, Sched<[WriteBr]>; diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp index b6ba5f22fafb..f159beee9730 100644 --- a/lib/Target/AVR/AVRISelLowering.cpp +++ b/lib/Target/AVR/AVRISelLowering.cpp @@ -1689,6 +1689,8 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html switch (Constraint[0]) { + default: + break; case 'a': // Simple upper registers case 'b': // Base pointer registers pairs case 'd': // Upper register @@ -1715,9 +1717,7 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const { case 'O': // Integer constant (Range: 8, 16, 24) case 'P': // Integer constant (Range: 1) case 'R': // Integer constant (Range: -6 to 5)x - return C_Other; - default: - break; + return C_Immediate; } } diff --git a/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 51d4cbc8a429..509484b71544 100644 --- a/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -116,9 +116,8 @@ private: void replaceWithGEP(std::vector<CallInst *> &CallList, uint32_t NumOfZerosIndex, uint32_t DIIndex); - Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr, - std::string &AccessKey, uint32_t Kind, - MDNode *&TypeMeta); + Value *computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey, + uint32_t Kind, MDNode *&TypeMeta); bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex); bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind); }; @@ -340,8 +339,7 @@ bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue, /// Compute the base of the whole preserve_*_access_index chains, i.e., the base /// pointer of the first preserve_*_access_index call, and construct the access /// string, which will be the name of a global variable. -Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, - std::string &AccessStr, +Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey, uint32_t Kind, MDNode *&TypeMeta) { @@ -392,16 +390,16 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2) return nullptr; - // Construct the type string AccessStr. + // Construct the type string AccessKey. for (unsigned I = 0; I < AccessIndices.size(); ++I) - AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr; + AccessKey = std::to_string(AccessIndices[I]) + ":" + AccessKey; if (TypeNameIndex == AccessIndices.size() - 1) - AccessStr = "0:" + AccessStr; + AccessKey = "0:" + AccessKey; // Access key is the type name + access string, uniquely identifying // one kernel memory access. - AccessKey = LastTypeName + ":" + AccessStr; + AccessKey = LastTypeName + ":" + AccessKey; return Base; } @@ -410,10 +408,10 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, /// transformation to a chain of relocable GEPs. bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, uint32_t Kind) { - std::string AccessStr, AccessKey; + std::string AccessKey; MDNode *TypeMeta = nullptr; Value *Base = - computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta); + computeBaseAndAccessKey(Call, AccessKey, Kind, TypeMeta); if (!Base) return false; @@ -432,7 +430,7 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) { GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false, - GlobalVariable::ExternalLinkage, NULL, AccessStr); + GlobalVariable::ExternalLinkage, NULL, AccessKey); GV->addAttribute(BPFCoreSharedInfo::AmaAttr); // Set the metadata (debuginfo types) for the global. if (TypeMeta) diff --git a/lib/Target/BPF/BTFDebug.cpp b/lib/Target/BPF/BTFDebug.cpp index fa35c6619e21..5c542e739088 100644 --- a/lib/Target/BPF/BTFDebug.cpp +++ b/lib/Target/BPF/BTFDebug.cpp @@ -30,6 +30,18 @@ static const char *BTFKindStr[] = { #include "BTF.def" }; +static const DIType * stripQualifiers(const DIType *Ty) { + while (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) { + unsigned Tag = DTy->getTag(); + if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_restrict_type) + break; + Ty = DTy->getBaseType(); + } + + return Ty; +} + /// Emit a BTF common type. void BTFTypeBase::emitType(MCStreamer &OS) { OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) + @@ -184,9 +196,9 @@ void BTFTypeEnum::emitType(MCStreamer &OS) { } } -BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, - uint32_t NumElems) - : ElemSize(ElemSize) { +BTFTypeArray::BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId, + uint32_t ElemSize, uint32_t NumElems) + : ElemTyNoQual(Ty), ElemSize(ElemSize) { Kind = BTF::BTF_KIND_ARRAY; BTFType.NameOff = 0; BTFType.Info = Kind << 24; @@ -207,6 +219,9 @@ void BTFTypeArray::completeType(BTFDebug &BDebug) { // created during initial type traversal. Just // retrieve that type id. ArrayInfo.IndexType = BDebug.getArrayIndexTypeId(); + + ElemTypeNoQual = ElemTyNoQual ? BDebug.getTypeId(ElemTyNoQual) + : ArrayInfo.ElemType; } void BTFTypeArray::emitType(MCStreamer &OS) { @@ -218,7 +233,7 @@ void BTFTypeArray::emitType(MCStreamer &OS) { void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset, uint32_t &ElementTypeId) { - ElementTypeId = ArrayInfo.ElemType; + ElementTypeId = ElemTypeNoQual; LocOffset = Loc * ElemSize; } @@ -251,7 +266,9 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) { } else { BTFMember.Offset = DDTy->getOffsetInBits(); } - BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType()); + const auto *BaseTy = DDTy->getBaseType(); + BTFMember.Type = BDebug.getTypeId(BaseTy); + MemberTypeNoQual.push_back(BDebug.getTypeId(stripQualifiers(BaseTy))); Members.push_back(BTFMember); } } @@ -270,7 +287,7 @@ std::string BTFTypeStruct::getName() { return STy->getName(); } void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset, uint32_t &MemberType) { - MemberType = Members[Loc].Type; + MemberType = MemberTypeNoQual[Loc]; MemberOffset = HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset; } @@ -492,10 +509,13 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) { uint32_t ElemTypeId, ElemSize; const DIType *ElemType = CTy->getBaseType(); visitTypeEntry(ElemType, ElemTypeId, false, false); + + // Strip qualifiers from element type to get accurate element size. + ElemType = stripQualifiers(ElemType); ElemSize = ElemType->getSizeInBits() >> 3; if (!CTy->getSizeInBits()) { - auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemTypeId, 0, 0); + auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemType, ElemTypeId, 0, 0); ArrayTypes.push_back(TypeEntry.get()); ElemTypeId = addType(std::move(TypeEntry), CTy); } else { @@ -507,9 +527,11 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) { const DISubrange *SR = cast<DISubrange>(Element); auto *CI = SR->getCount().dyn_cast<ConstantInt *>(); int64_t Count = CI->getSExtValue(); + const DIType *ArrayElemTy = (I == 0) ? ElemType : nullptr; auto TypeEntry = - llvm::make_unique<BTFTypeArray>(ElemTypeId, ElemSize, Count); + llvm::make_unique<BTFTypeArray>(ArrayElemTy, ElemTypeId, + ElemSize, Count); ArrayTypes.push_back(TypeEntry.get()); if (I == 0) ElemTypeId = addType(std::move(TypeEntry), CTy); @@ -1006,19 +1028,20 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI, unsigned RootId = populateStructType(RootTy); setTypeFromId(RootId, &PrevStructType, &PrevArrayType); unsigned RootTySize = PrevStructType->getStructSize(); + StringRef IndexPattern = AccessPattern.substr(AccessPattern.find_first_of(':') + 1); BTFOffsetReloc OffsetReloc; OffsetReloc.Label = ORSym; - OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back()); + OffsetReloc.OffsetNameOff = addString(IndexPattern.drop_back()); OffsetReloc.TypeID = RootId; uint32_t Start = 0, End = 0, Offset = 0; bool FirstAccess = true; - for (auto C : AccessPattern) { + for (auto C : IndexPattern) { if (C != ':') { End++; } else { - std::string SubStr = AccessPattern.substr(Start, End - Start); + std::string SubStr = IndexPattern.substr(Start, End - Start); int Loc = std::stoi(SubStr); if (FirstAccess) { @@ -1038,12 +1061,15 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI, Offset += LocOffset; PrevArrayType = nullptr; setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType); + } else { + llvm_unreachable("Internal Error: BTF offset relocation type traversal error"); } + Start = End + 1; End = Start; } } - AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset; + AccessOffsets[AccessPattern.str()] = Offset; OffsetRelocTable[SecNameOff].push_back(OffsetReloc); } @@ -1227,7 +1253,7 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) { MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index); DIType *Ty = dyn_cast<DIType>(MDN); std::string TypeName = Ty->getName(); - int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()]; + int64_t Imm = AccessOffsets[GVar->getName().str()]; // Emit "mov ri, <imm>" for abstract member accesses. OutMI.setOpcode(BPF::MOV_ri); diff --git a/lib/Target/BPF/BTFDebug.h b/lib/Target/BPF/BTFDebug.h index 6c0cdde17d9b..e210d18f941e 100644 --- a/lib/Target/BPF/BTFDebug.h +++ b/lib/Target/BPF/BTFDebug.h @@ -104,11 +104,14 @@ public: /// Handle array type. class BTFTypeArray : public BTFTypeBase { + const DIType *ElemTyNoQual; uint32_t ElemSize; struct BTF::BTFArray ArrayInfo; + uint32_t ElemTypeNoQual; public: - BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems); + BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId, + uint32_t ElemSize, uint32_t NumElems); uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; } void completeType(BTFDebug &BDebug); void emitType(MCStreamer &OS); @@ -120,6 +123,7 @@ class BTFTypeStruct : public BTFTypeBase { const DICompositeType *STy; bool HasBitField; std::vector<struct BTF::BTFMember> Members; + std::vector<uint32_t> MemberTypeNoQual; public: BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField, diff --git a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 0172c6298772..f10f7a2b77d6 100644 --- a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1208,6 +1208,24 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) { Res = V; } else Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + + MCBinaryExpr::Opcode Opcode; + switch (getLexer().getKind()) { + default: + Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); + return MatchOperand_Success; + case AsmToken::Plus: + Opcode = MCBinaryExpr::Add; + break; + case AsmToken::Minus: + Opcode = MCBinaryExpr::Sub; + break; + } + + const MCExpr *Expr; + if (getParser().parseExpression(Expr)) + return MatchOperand_ParseFail; + Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext()); Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); return MatchOperand_Success; } diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp index 32c3b9684d2c..bbaa16c08634 100644 --- a/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -40,8 +40,16 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { uint64_t FrameSize = MFI.getStackSize(); // Get the alignment. - uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment() - : getStackAlignment(); + unsigned StackAlign = getStackAlignment(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment()); + FrameSize += (MaxStackAlign - StackAlign); + StackAlign = MaxStackAlign; + } + + // Set Max Call Frame Size + uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign); + MFI.setMaxCallFrameSize(MaxCallSize); // Make sure the frame is aligned. FrameSize = alignTo(FrameSize, StackAlign); @@ -101,6 +109,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, const RISCVInstrInfo *TII = STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); + if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) { + report_fatal_error( + "RISC-V backend can't currently handle functions that need stack " + "realignment and have variable sized objects"); + } + unsigned FPReg = getFPReg(STI); unsigned SPReg = getSPReg(STI); @@ -158,6 +172,29 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, nullptr, RI->getDwarfRegNum(FPReg, true), 0)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); + + // Realign Stack + const RISCVRegisterInfo *RI = STI.getRegisterInfo(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxAlignment = MFI.getMaxAlignment(); + + const RISCVInstrInfo *TII = STI.getInstrInfo(); + if (isInt<12>(-(int)MaxAlignment)) { + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg) + .addReg(SPReg) + .addImm(-(int)MaxAlignment); + } else { + unsigned ShiftAmount = countTrailingZeros(MaxAlignment); + unsigned VR = + MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR) + .addReg(SPReg) + .addImm(ShiftAmount); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg) + .addReg(VR) + .addImm(ShiftAmount); + } + } } } @@ -257,6 +294,13 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = RISCV::X2; Offset += MF.getFrameInfo().getStackSize(); + } else if (RI->needsStackRealignment(MF)) { + assert(!MFI.hasVarSizedObjects() && + "Unexpected combination of stack realignment and varsized objects"); + // If the stack was realigned, the frame pointer is set in order to allow + // SP to be restored, but we still access stack objects using SP. + FrameReg = RISCV::X2; + Offset += MF.getFrameInfo().getStackSize(); } else { FrameReg = RI->getFrameRegister(MF); if (hasFP(MF)) diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp index ce7b85911ab6..e695f79f5cf4 100644 --- a/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1007,12 +1007,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( // We can materialise `c1 << c2` into an add immediate, so it's "free", // and the combine should happen, to potentially allow further combines // later. - if (isLegalAddImmediate(ShiftedC1Int.getSExtValue())) + if (ShiftedC1Int.getMinSignedBits() <= 64 && + isLegalAddImmediate(ShiftedC1Int.getSExtValue())) return true; // We can materialise `c1` in an add immediate, so it's "free", and the // combine should be prevented. - if (isLegalAddImmediate(C1Int.getSExtValue())) + if (C1Int.getMinSignedBits() <= 64 && + isLegalAddImmediate(C1Int.getSExtValue())) return false; // Neither constant will fit into an immediate, so find materialisation @@ -2397,6 +2399,25 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +RISCVTargetLowering::ConstraintType +RISCVTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'f': + return C_RegisterClass; + case 'I': + case 'J': + case 'K': + return C_Immediate; + } + } + return TargetLowering::getConstraintType(Constraint); +} + std::pair<unsigned, const TargetRegisterClass *> RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, @@ -2407,6 +2428,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, switch (Constraint[0]) { case 'r': return std::make_pair(0U, &RISCV::GPRRegClass); + case 'f': + if (Subtarget.hasStdExtF() && VT == MVT::f32) + return std::make_pair(0U, &RISCV::FPR32RegClass); + if (Subtarget.hasStdExtD() && VT == MVT::f64) + return std::make_pair(0U, &RISCV::FPR64RegClass); + break; default: break; } diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h index 17db03bbb69e..f28c4753c1d9 100644 --- a/lib/Target/RISCV/RISCVISelLowering.h +++ b/lib/Target/RISCV/RISCVISelLowering.h @@ -92,6 +92,7 @@ public: // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index a6d440fa8aa2..804f7ba74edf 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -3183,7 +3183,7 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const { case 'e': return C_RegisterClass; case 'I': // SIMM13 - return C_Other; + return C_Immediate; } } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 78820f511ab4..e7b7a5b0cd53 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -956,7 +956,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const { case 'K': // Signed 16-bit constant case 'L': // Signed 20-bit displacement (on all targets we support) case 'M': // 0x7fffffff - return C_Other; + return C_Immediate; default: break; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 3112f00c91f2..e20315da55a5 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -95,7 +95,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions">; def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", - "64-bit with cmpxchg16b">; + "64-bit with cmpxchg16b", + [FeatureCMPXCHG8B]>; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 95d31e62cafc..34ad589d205f 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2464,6 +2464,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N, Complexity += 2; } + // Heuristic: try harder to form an LEA from ADD if the operands set flags. + // Unlike ADD, LEA does not affect flags, so we will be less likely to require + // duplicating flag-producing instructions later in the pipeline. + if (N.getOpcode() == ISD::ADD) { + auto isMathWithFlags = [](SDValue V) { + switch (V.getOpcode()) { + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::ADC: + case X86ISD::SBB: + /* TODO: These opcodes can be added safely, but we may want to justify + their inclusion for different reasons (better for reg-alloc). + case X86ISD::SMUL: + case X86ISD::UMUL: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: + */ + // Value 1 is the flag output of the node - verify it's not dead. + return !SDValue(V.getNode(), 1).use_empty(); + default: + return false; + } + }; + // TODO: This could be an 'or' rather than 'and' to make the transform more + // likely to happen. We might want to factor in whether there's a + // load folding opportunity for the math op that disappears with LEA. + if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) + Complexity++; + } + if (AM.Disp) Complexity++; @@ -3302,8 +3333,12 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { SDValue ImplDef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0); insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef); - NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef, - NBits); + + SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32); + insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal); + NBits = SDValue( + CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef, + NBits, SRIdxVal), 0); insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); if (Subtarget->hasBMI2()) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0b4bf687e6cf..ad68ddbeaa8b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4069,6 +4069,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); + // Save heapallocsite metadata. + if (CLI.CS) + if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite")) + DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); + // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPop; if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, @@ -5500,6 +5505,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) { if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && Idx == (VT.getVectorNumElements() / 2) && Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOperand(1).getValueType() == SubVT && isNullConstant(Src.getOperand(2))) { Ops.push_back(Src.getOperand(1)); Ops.push_back(Sub); @@ -34062,25 +34068,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return true; break; } - case X86ISD::SUBV_BROADCAST: { - // Reduce size of broadcast if we don't need the upper half. - unsigned HalfElts = NumElts / 2; - if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) { - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - - SDValue Half = Src; - if (SrcVT.getVectorNumElements() != HalfElts) { - MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts); - Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src); - } - - return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0, - TLO.DAG, SDLoc(Op), - Half.getValueSizeInBits())); - } - break; - } case X86ISD::VPERMV: { SDValue Mask = Op.getOperand(0); APInt MaskUndef, MaskZero; @@ -34135,6 +34122,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); return TLO.CombineTo(Op, Insert); } + // Subvector broadcast. + case X86ISD::SUBV_BROADCAST: { + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + if (Src.getValueSizeInBits() > ExtSizeInBits) + Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits); + else if (Src.getValueSizeInBits() < ExtSizeInBits) { + MVT SrcSVT = Src.getSimpleValueType().getScalarType(); + MVT SrcVT = + MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits()); + Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src); + } + return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0, + TLO.DAG, DL, ExtSizeInBits)); + } // Byte shifts by immediate. case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: @@ -43839,6 +43841,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, Vec.getOpcode() == ISD::INSERT_SUBVECTOR && OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 && isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() && + Vec.getOperand(1).getValueSizeInBits() == SubVecVT.getSizeInBits() && Vec.hasOneUse()) { Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT), Vec.getOperand(1), Vec.getOperand(2)); @@ -44660,10 +44663,11 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const { case 'I': case 'J': case 'K': - case 'L': - case 'M': case 'N': case 'G': + case 'L': + case 'M': + return C_Immediate; case 'C': case 'e': case 'Z': |
