diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-11 18:24:21 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2024-01-11 18:24:21 +0000 |
| commit | 950076cd18f3fa9d789b4add9d405898efff09a5 (patch) | |
| tree | 2454649366290c6292cc2d94dde042f71bc1e144 /llvm | |
| parent | aca2e42c67292825f835f094eb0c4df5ce6013db (diff) | |
Diffstat (limited to 'llvm')
135 files changed, 2899 insertions, 1418 deletions
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 18bc4d108b15..a136eeb0ff1b 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1290,18 +1290,6 @@ public: return (*this)[size() - 1]; } - /// Compare this range with another. - template <typename OtherT> - friend bool operator==(const indexed_accessor_range_base &lhs, - const OtherT &rhs) { - return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); - } - template <typename OtherT> - friend bool operator!=(const indexed_accessor_range_base &lhs, - const OtherT &rhs) { - return !(lhs == rhs); - } - /// Return the size of this range. size_t size() const { return count; } @@ -1364,6 +1352,23 @@ protected: /// The size from the owning range. ptrdiff_t count; }; +/// Compare this range with another. +/// FIXME: Make me a member function instead of friend when it works in C++20. +template <typename OtherT, typename DerivedT, typename BaseT, typename T, + typename PointerT, typename ReferenceT> +bool operator==(const indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, + ReferenceT> &lhs, + const OtherT &rhs) { + return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +} + +template <typename OtherT, typename DerivedT, typename BaseT, typename T, + typename PointerT, typename ReferenceT> +bool operator!=(const indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, + ReferenceT> &lhs, + const OtherT &rhs) { + return !(lhs == rhs); +} } // end namespace detail /// This class provides an implementation of a range of diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index d892333de391..1c6c96678b5d 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -128,7 +128,7 @@ namespace llvm { /// data - Get a pointer to the start of the string (which may not be null /// terminated). - [[nodiscard]] const char *data() const { return Data; } + [[nodiscard]] constexpr const char *data() const { return Data; } /// empty - Check if the string is empty. [[nodiscard]] constexpr bool empty() const { return Length == 0; } @@ -245,7 +245,7 @@ namespace llvm { /// @name Type Conversions /// @{ - operator std::string_view() const { + constexpr operator std::string_view() const { return std::string_view(data(), size()); } diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index ee9207bb4f7d..b22bdd555cd4 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -470,123 +470,125 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16), "_ZGV_LLVM_N16 #elif defined(TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS) -TLI_DEFINE_VECFUNC( "acos", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "asin", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC( "atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "cos", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "cosh", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "exp", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp10", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "exp10", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("fmod", "_ZGVnN2vv_fmod", FIXED(2), "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC( "lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "log", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.log.f64", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "log2", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log2", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8") +TLI_DEFINE_VECFUNC("modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8") -TLI_DEFINE_VECFUNC( "pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC( "llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC( "sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8") +TLI_DEFINE_VECFUNC("sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8") -TLI_DEFINE_VECFUNC( "sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8") +TLI_DEFINE_VECFUNC("sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8") -TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "tanh", "_ZGVnN2v_tanh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tanh", "_ZGVnN2v_tanh", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "tgamma", "_ZGVnN2v_tgamma", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tgamma", "_ZGVnN2v_tgamma", FIXED(2), "_ZGV_LLVM_N2v") #elif defined(TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS) -TLI_DEFINE_VECFUNC( "acosf", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "asinf", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC( "atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "cosf", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "coshf", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("expf", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "expf", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "exp10f", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("fmodf", "_ZGVnN4vv_fmodf", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC( "lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "logf", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("logf", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "log2f", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log2f", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4") +TLI_DEFINE_VECFUNC("modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4") -TLI_DEFINE_VECFUNC( "powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC( "llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC( "sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("sincosf", "_ZGVnN4vl4l4_sincosf", FIXED(4), "_ZGV_LLVM_N4vl4l4") TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N4vl4l4") -TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "tanhf", "_ZGVnN4v_tanhf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN4v_tanhf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "tgammaf", "_ZGVnN4v_tgammaf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tgammaf", "_ZGVnN4v_tgammaf", FIXED(4), "_ZGV_LLVM_N4v") #elif defined(TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS) @@ -618,16 +620,16 @@ TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") - TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED, "_ZGVsMxvv") @@ -639,16 +641,16 @@ TLI_DEFINE_VECFUNC("logf", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVsMxv_log", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC( "log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC( "log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") - TLI_DEFINE_VECFUNC("log10", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8") TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4") @@ -765,16 +767,6 @@ TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_vexpq_f32", FIXED(4), NOMASK, "_ZGV_LL TLI_DEFINE_VECFUNC("llvm.exp.f64", "armpl_svexp_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_svexp_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("exp2", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("exp2f", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("exp2", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("exp2f", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") - TLI_DEFINE_VECFUNC("exp10", "armpl_vexp10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("exp10f", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("exp10", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -785,6 +777,16 @@ TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZG TLI_DEFINE_VECFUNC("llvm.exp10.f64", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_svexp10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("exp2", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp2f", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("exp2", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("exp2f", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("expm1", "armpl_vexpm1q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("expm1f", "armpl_vexpm1q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("expm1", "armpl_svexpm1_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -830,6 +832,16 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_vlogq_f32", FIXED(4), NOMASK, "_ZGV_LL TLI_DEFINE_VECFUNC("llvm.log.f64", "armpl_svlog_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_svlog_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log10", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log10f", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log10", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log10f", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("log1p", "armpl_vlog1pq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("log1pf", "armpl_vlog1pq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("log1p", "armpl_svlog1p_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -845,16 +857,6 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_vlog2q_f32", FIXED(4), NOMASK, "_ZGV_ TLI_DEFINE_VECFUNC("llvm.log2.f64", "armpl_svlog2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("log10", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("log10f", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("log10", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("log10f", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") - TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8") TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4") TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8") diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def index 30375de420e3..5fb3fa4aeb7b 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def @@ -59,6 +59,7 @@ ELF_RELOC(R_AARCH64_ADR_GOT_PAGE, 0x137) ELF_RELOC(R_AARCH64_LD64_GOT_LO12_NC, 0x138) ELF_RELOC(R_AARCH64_LD64_GOTPAGE_LO15, 0x139) ELF_RELOC(R_AARCH64_PLT32, 0x13a) +ELF_RELOC(R_AARCH64_GOTPCREL32, 0x13b) ELF_RELOC(R_AARCH64_TLSGD_ADR_PREL21, 0x200) ELF_RELOC(R_AARCH64_TLSGD_ADR_PAGE21, 0x201) ELF_RELOC(R_AARCH64_TLSGD_ADD_LO12_NC, 0x202) diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def index c7fd6490041c..b478799c91fb 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def @@ -40,8 +40,7 @@ ELF_RELOC(R_RISCV_SUB8, 37) ELF_RELOC(R_RISCV_SUB16, 38) ELF_RELOC(R_RISCV_SUB32, 39) ELF_RELOC(R_RISCV_SUB64, 40) -ELF_RELOC(R_RISCV_GNU_VTINHERIT, 41) -ELF_RELOC(R_RISCV_GNU_VTENTRY, 42) +ELF_RELOC(R_RISCV_GOT32_PCREL, 41) ELF_RELOC(R_RISCV_ALIGN, 43) ELF_RELOC(R_RISCV_RVC_BRANCH, 44) ELF_RELOC(R_RISCV_RVC_JUMP, 45) diff --git a/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h b/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h index b740ab567b12..fb0ecd828b68 100644 --- a/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h +++ b/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h @@ -1,13 +1,21 @@ +//===-- llvm/CodeGen/AssignmentTrackingAnalysis.h --------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #ifndef LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H #define LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { -class Function; class Instruction; class raw_ostream; } // namespace llvm @@ -94,6 +102,25 @@ public: ///@} }; +class DebugAssignmentTrackingAnalysis + : public AnalysisInfoMixin<DebugAssignmentTrackingAnalysis> { + friend AnalysisInfoMixin<DebugAssignmentTrackingAnalysis>; + static AnalysisKey Key; + +public: + using Result = FunctionVarLocs; + Result run(Function &F, FunctionAnalysisManager &FAM); +}; + +class DebugAssignmentTrackingPrinterPass + : public PassInfoMixin<DebugAssignmentTrackingPrinterPass> { + raw_ostream &OS; + +public: + DebugAssignmentTrackingPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; + class AssignmentTrackingAnalysis : public FunctionPass { std::unique_ptr<FunctionVarLocs> Results; diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index fa81ff504ac6..f540f3774c41 100644 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -23,6 +23,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CallBrPrepare.h" #include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/CodeGen/DwarfEHPrepare.h" diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index dcc1a4580b14..a6e9406bed06 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -910,6 +910,9 @@ private: bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo); + /// Try to fold (icmp X, Y) ? X : Y -> integer minmax. + bool tryFoldSelectToIntMinMax(GSelect *Select, BuildFnTy &MatchInfo); + bool isOneOrOneSplat(Register Src, bool AllowUndefs); bool isZeroOrZeroSplat(Register Src, bool AllowUndefs); bool isConstantSplatVector(Register Src, int64_t SplatValue, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 6ab1d4550c51..14885d5f9d08 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -558,6 +558,24 @@ public: } }; +/// Represents a G_PHI. +class GPhi : public GenericMachineInstr { +public: + /// Returns the number of incoming values. + unsigned getNumIncomingValues() const { return (getNumOperands() - 1) / 2; } + /// Returns the I'th incoming vreg. + Register getIncomingValue(unsigned I) { + return getOperand(I * 2 + 1).getReg(); + } + /// Returns the I'th incoming basic block. + MachineBasicBlock *getIncomingBlock(unsigned I) { + return getOperand(I * 2 + 2).getMBB(); + } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_PHI; + } +}; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h index b1fcdd207a60..4fbff4d10f8a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -67,10 +67,9 @@ private: typedef SmallSetVector<MachineInstr *, 32> LocalizedSetVecT; - /// If \p Op is a phi operand and not unique in that phi, that is, - /// there are other operands in the phi with the same register, - /// return true. - bool isNonUniquePhiValue(MachineOperand &Op) const; + /// If \p Op is a reg operand of a PHI, return the number of total + /// operands in the PHI that are the same as \p Op, including itself. + unsigned getNumPhiUses(MachineOperand &Op) const; /// Do inter-block localization from the entry block. bool localizeInterBlock(MachineFunction &MF, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 40046e0a8dec..e4d90f6e898f 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -159,7 +159,15 @@ public: OPC_CheckChild2Same, OPC_CheckChild3Same, OPC_CheckPatternPredicate, + OPC_CheckPatternPredicate0, + OPC_CheckPatternPredicate1, OPC_CheckPatternPredicate2, + OPC_CheckPatternPredicate3, + OPC_CheckPatternPredicate4, + OPC_CheckPatternPredicate5, + OPC_CheckPatternPredicate6, + OPC_CheckPatternPredicate7, + OPC_CheckPatternPredicateTwoByte, OPC_CheckPredicate, OPC_CheckPredicateWithOperands, OPC_CheckOpcode, @@ -207,6 +215,14 @@ public: OPC_CheckChild2CondCode, OPC_CheckValueType, OPC_CheckComplexPat, + OPC_CheckComplexPat0, + OPC_CheckComplexPat1, + OPC_CheckComplexPat2, + OPC_CheckComplexPat3, + OPC_CheckComplexPat4, + OPC_CheckComplexPat5, + OPC_CheckComplexPat6, + OPC_CheckComplexPat7, OPC_CheckAndImm, OPC_CheckOrImm, OPC_CheckImmAllOnesV, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index ebf410cc94de..65b06d0f4579 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -935,6 +935,9 @@ public: /// Helper method returns the APInt of a ConstantSDNode operand. inline const APInt &getConstantOperandAPInt(unsigned Num) const; + /// Helper method returns the APInt value of a ConstantSDNode. + inline const APInt &getAsAPIntVal() const; + const SDValue &getOperand(unsigned Num) const { assert(Num < NumOperands && "Invalid child # of SDNode!"); return OperandList[Num]; @@ -1656,6 +1659,10 @@ const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const { return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue(); } +const APInt &SDNode::getAsAPIntVal() const { + return cast<ConstantSDNode>(this)->getAPIntValue(); +} + class ConstantFPSDNode : public SDNode { friend class SelectionDAG; diff --git a/llvm/lib/DWARFLinker/Parallel/Utils.h b/llvm/include/llvm/DWARFLinker/Utils.h index 3c05b2ea173d..23e59c967011 100644 --- a/llvm/lib/DWARFLinker/Parallel/Utils.h +++ b/llvm/include/llvm/DWARFLinker/Utils.h @@ -6,14 +6,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H -#define LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H +#ifndef LLVM_DWARFLINKER_UTILS_H +#define LLVM_DWARFLINKER_UTILS_H +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" namespace llvm { namespace dwarf_linker { -namespace parallel { /// This function calls \p Iteration() until it returns false. /// If number of iterations exceeds \p MaxCounter then an Error is returned. @@ -27,16 +30,35 @@ inline Error finiteLoop(function_ref<Expected<bool>()> Iteration, Expected<bool> IterationResultOrError = Iteration(); if (!IterationResultOrError) return IterationResultOrError.takeError(); - if (!IterationResultOrError.get()) return Error::success(); } - return createStringError(std::errc::invalid_argument, "Infinite recursion"); } -} // end of namespace parallel +/// Make a best effort to guess the +/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path. +inline SmallString<128> guessToolchainBaseDir(StringRef SysRoot) { + SmallString<128> Result; + // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk + StringRef Base = sys::path::parent_path(SysRoot); + if (sys::path::filename(Base) != "SDKs") + return Result; + Base = sys::path::parent_path(Base); + Result = Base; + Result += "/Toolchains"; + return Result; +} + +inline bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { + // Debug info can contain paths from any OS, not necessarily + // an OS we're currently running on. Moreover different compilation units can + // be compiled on different operating systems and linked together later. + return sys::path::is_absolute(Path, sys::path::Style::posix) || + sys::path::is_absolute(Path, sys::path::Style::windows); +} + } // end of namespace dwarf_linker } // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H +#endif // LLVM_DWARFLINKER_UTILS_H diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td index 013d18e160de..0dbd934d83f0 100644 --- a/llvm/include/llvm/Frontend/OpenACC/ACC.td +++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td @@ -391,9 +391,7 @@ def ACC_Loop : Directive<"loop"> { let allowedClauses = [ VersionedClause<ACCC_DeviceType>, VersionedClause<ACCC_Private>, - VersionedClause<ACCC_Reduction> - ]; - let allowedOnceClauses = [ + VersionedClause<ACCC_Reduction>, VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Gang>, VersionedClause<ACCC_Tile>, @@ -421,15 +419,17 @@ def ACC_Init : Directive<"init"> { // 2.15.1 def ACC_Routine : Directive<"routine"> { - let allowedOnceClauses = [ + let allowedClauses = [ VersionedClause<ACCC_Bind>, VersionedClause<ACCC_DeviceType>, - VersionedClause<ACCC_NoHost>, VersionedClause<ACCC_Gang>, VersionedClause<ACCC_Seq>, VersionedClause<ACCC_Vector>, VersionedClause<ACCC_Worker> ]; + let allowedOnceClauses = [ + VersionedClause<ACCC_NoHost> + ]; } // 2.14.3 @@ -532,32 +532,32 @@ def ACC_HostData : Directive<"host_data"> { // 2.11 def ACC_KernelsLoop : Directive<"kernels loop"> { let allowedClauses = [ + VersionedClause<ACCC_Attach>, + VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Copy>, VersionedClause<ACCC_Copyin>, VersionedClause<ACCC_Copyout>, VersionedClause<ACCC_Create>, + VersionedClause<ACCC_DevicePtr>, VersionedClause<ACCC_DeviceType>, + VersionedClause<ACCC_Gang>, VersionedClause<ACCC_NoCreate>, + VersionedClause<ACCC_NumGangs>, + VersionedClause<ACCC_NumWorkers>, VersionedClause<ACCC_Present>, VersionedClause<ACCC_Private>, VersionedClause<ACCC_Reduction>, - VersionedClause<ACCC_DevicePtr>, - VersionedClause<ACCC_Attach>, - VersionedClause<ACCC_Wait> + VersionedClause<ACCC_Tile>, + VersionedClause<ACCC_Vector>, + VersionedClause<ACCC_VectorLength>, + VersionedClause<ACCC_Wait>, + VersionedClause<ACCC_Worker> ]; let allowedOnceClauses = [ VersionedClause<ACCC_Async>, - VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Default>, - VersionedClause<ACCC_Gang>, VersionedClause<ACCC_If>, - VersionedClause<ACCC_NumGangs>, - VersionedClause<ACCC_NumWorkers>, - VersionedClause<ACCC_Self>, - VersionedClause<ACCC_Tile>, - VersionedClause<ACCC_Vector>, - VersionedClause<ACCC_VectorLength>, - VersionedClause<ACCC_Worker> + VersionedClause<ACCC_Self> ]; let allowedExclusiveClauses = [ VersionedClause<ACCC_Auto>, @@ -570,6 +570,7 @@ def ACC_KernelsLoop : Directive<"kernels loop"> { def ACC_ParallelLoop : Directive<"parallel loop"> { let allowedClauses = [ VersionedClause<ACCC_Attach>, + VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Copy>, VersionedClause<ACCC_Copyin>, VersionedClause<ACCC_Copyout>, @@ -577,25 +578,24 @@ def ACC_ParallelLoop : Directive<"parallel loop"> { VersionedClause<ACCC_DevicePtr>, VersionedClause<ACCC_DeviceType>, VersionedClause<ACCC_FirstPrivate>, + VersionedClause<ACCC_Gang>, VersionedClause<ACCC_NoCreate>, + VersionedClause<ACCC_NumGangs>, + VersionedClause<ACCC_NumWorkers>, VersionedClause<ACCC_Present>, VersionedClause<ACCC_Private>, VersionedClause<ACCC_Reduction>, VersionedClause<ACCC_Tile>, - VersionedClause<ACCC_Wait> + VersionedClause<ACCC_Vector>, + VersionedClause<ACCC_VectorLength>, + VersionedClause<ACCC_Wait>, + VersionedClause<ACCC_Worker> ]; let allowedOnceClauses = [ VersionedClause<ACCC_Async>, - VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Default>, - VersionedClause<ACCC_Gang>, VersionedClause<ACCC_If>, - VersionedClause<ACCC_NumGangs>, - VersionedClause<ACCC_NumWorkers>, - VersionedClause<ACCC_Self>, - VersionedClause<ACCC_Vector>, - VersionedClause<ACCC_VectorLength>, - VersionedClause<ACCC_Worker> + VersionedClause<ACCC_Self> ]; let allowedExclusiveClauses = [ VersionedClause<ACCC_Auto>, @@ -608,6 +608,7 @@ def ACC_ParallelLoop : Directive<"parallel loop"> { def ACC_SerialLoop : Directive<"serial loop"> { let allowedClauses = [ VersionedClause<ACCC_Attach>, + VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Copy>, VersionedClause<ACCC_Copyin>, VersionedClause<ACCC_Copyout>, @@ -615,22 +616,21 @@ def ACC_SerialLoop : Directive<"serial loop"> { VersionedClause<ACCC_DevicePtr>, VersionedClause<ACCC_DeviceType>, VersionedClause<ACCC_FirstPrivate>, + VersionedClause<ACCC_Gang>, VersionedClause<ACCC_NoCreate>, VersionedClause<ACCC_Present>, VersionedClause<ACCC_Private>, VersionedClause<ACCC_Reduction>, - VersionedClause<ACCC_Wait> + VersionedClause<ACCC_Tile>, + VersionedClause<ACCC_Vector>, + VersionedClause<ACCC_Wait>, + VersionedClause<ACCC_Worker> ]; let allowedOnceClauses = [ VersionedClause<ACCC_Async>, - VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Default>, - VersionedClause<ACCC_Gang>, VersionedClause<ACCC_If>, - VersionedClause<ACCC_Self>, - VersionedClause<ACCC_Tile>, - VersionedClause<ACCC_Vector>, - VersionedClause<ACCC_Worker> + VersionedClause<ACCC_Self> ]; let allowedExclusiveClauses = [ VersionedClause<ACCC_Auto>, diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 9088168b4c67..acff5c20b1b9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2708,8 +2708,8 @@ class SVE2p1_Single_Store_Quadword : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_nxv1i1_ty, llvm_ptr_ty], [IntrWriteMem, IntrArgMemOnly]>; -def int_aarch64_sve_st1uwq : SVE2p1_Single_Store_Quadword; -def int_aarch64_sve_st1udq : SVE2p1_Single_Store_Quadword; +def int_aarch64_sve_st1wq : SVE2p1_Single_Store_Quadword; +def int_aarch64_sve_st1dq : SVE2p1_Single_Store_Quadword; def int_aarch64_sve_ld2q_sret : AdvSIMD_2Vec_PredLoad_Intrinsic; @@ -3617,7 +3617,7 @@ def int_aarch64_sve_tbxq : AdvSIMD_SVE2_TBX_Intrinsic; // SVE2.1 - Extract vector segment from each pair of quadword segments. // -def int_aarch64_sve_extq_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic; // // SVE2.1 - Move predicate to/from vector diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 6fd8e80013ce..cf50f2a59f60 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -4710,4 +4710,14 @@ def int_nvvm_is_explicit_cluster [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>], "llvm.nvvm.is_explicit_cluster">; +// Setmaxnreg inc/dec intrinsics +def int_nvvm_setmaxnreg_inc_sync_aligned_u32 + : DefaultAttrsIntrinsic<[], [llvm_i32_ty], + [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>], + "llvm.nvvm.setmaxnreg.inc.sync.aligned.u32">; +def int_nvvm_setmaxnreg_dec_sync_aligned_u32 + : DefaultAttrsIntrinsic<[], [llvm_i32_ty], + [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>], + "llvm.nvvm.setmaxnreg.dec.sync.aligned.u32">; + } // let TargetPrefix = "nvvm" diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index e72f74ad4adb..66c7d10d823d 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1011,6 +1011,12 @@ public: return *Callsites; } + void addCallsite(CallsiteInfo &Callsite) { + if (!Callsites) + Callsites = std::make_unique<CallsitesTy>(); + Callsites->push_back(Callsite); + } + ArrayRef<AllocInfo> allocs() const { if (Allocs) return *Allocs; diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 447ac0f2aa61..90d99a6031c8 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1495,6 +1495,36 @@ struct ThreeOps_match { } }; +/// Matches instructions with Opcode and any number of operands +template <unsigned Opcode, typename... OperandTypes> struct AnyOps_match { + std::tuple<OperandTypes...> Operands; + + AnyOps_match(const OperandTypes &...Ops) : Operands(Ops...) {} + + // Operand matching works by recursively calling match_operands, matching the + // operands left to right. The first version is called for each operand but + // the last, for which the second version is called. The second version of + // match_operands is also used to match each individual operand. + template <int Idx, int Last> + std::enable_if_t<Idx != Last, bool> match_operands(const Instruction *I) { + return match_operands<Idx, Idx>(I) && match_operands<Idx + 1, Last>(I); + } + + template <int Idx, int Last> + std::enable_if_t<Idx == Last, bool> match_operands(const Instruction *I) { + return std::get<Idx>(Operands).match(I->getOperand(Idx)); + } + + template <typename OpTy> bool match(OpTy *V) { + if (V->getValueID() == Value::InstructionVal + Opcode) { + auto *I = cast<Instruction>(V); + return I->getNumOperands() == sizeof...(OperandTypes) && + match_operands<0, sizeof...(OperandTypes) - 1>(I); + } + return false; + } +}; + /// Matches SelectInst. template <typename Cond, typename LHS, typename RHS> inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select> @@ -1611,6 +1641,12 @@ m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) { PointerOp); } +/// Matches GetElementPtrInst. +template <typename... OperandTypes> +inline auto m_GEP(const OperandTypes &...Ops) { + return AnyOps_match<Instruction::GetElementPtr, OperandTypes...>(Ops...); +} + //===----------------------------------------------------------------------===// // Matchers for CastInst classes // diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 36be2e7d869e..87e7bbbd727e 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -1035,7 +1035,8 @@ const HashT HashType = HashT::MD5; inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); } // This structure defines the file header of the LLVM profile -// data file in indexed-format. +// data file in indexed-format. Please update llvm/docs/InstrProfileFormat.rst +// as appropriate when updating the indexed profile format. struct Header { uint64_t Magic; uint64_t Version; diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index f5de23ff4b94..25df899b3f36 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -123,6 +123,8 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \ /* INSTR_PROF_RAW_HEADER start */ /* Definition of member fields of the raw profile header data structure. */ +/* Please update llvm/docs/InstrProfileFormat.rst as appropriate when updating + raw profile format. */ #ifndef INSTR_PROF_RAW_HEADER #define INSTR_PROF_RAW_HEADER(Type, Name, Initializer) #else diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h index c539448683d3..46df93d75226 100644 --- a/llvm/include/llvm/Support/RISCVISAInfo.h +++ b/llvm/include/llvm/Support/RISCVISAInfo.h @@ -18,11 +18,6 @@ #include <vector> namespace llvm { -struct RISCVExtensionInfo { - unsigned MajorVersion; - unsigned MinorVersion; -}; - void riscvExtensionsHelp(StringMap<StringRef> DescMap); class RISCVISAInfo { @@ -30,6 +25,12 @@ public: RISCVISAInfo(const RISCVISAInfo &) = delete; RISCVISAInfo &operator=(const RISCVISAInfo &) = delete; + /// Represents the major and version number components of a RISC-V extension. + struct ExtensionVersion { + unsigned Major; + unsigned Minor; + }; + static bool compareExtension(const std::string &LHS, const std::string &RHS); /// Helper class for OrderedExtensionMap. @@ -41,7 +42,7 @@ public: /// OrderedExtensionMap is std::map, it's specialized to keep entries /// in canonical order of extension. - typedef std::map<std::string, RISCVExtensionInfo, ExtensionComparator> + typedef std::map<std::string, ExtensionVersion, ExtensionComparator> OrderedExtensionMap; RISCVISAInfo(unsigned XLen, OrderedExtensionMap &Exts) @@ -71,10 +72,10 @@ public: std::vector<std::string> toFeatures(bool AddAllExtensions = false, bool IgnoreUnknown = true) const; - const OrderedExtensionMap &getExtensions() const { return Exts; }; + const OrderedExtensionMap &getExtensions() const { return Exts; } - unsigned getXLen() const { return XLen; }; - unsigned getFLen() const { return FLen; }; + unsigned getXLen() const { return XLen; } + unsigned getFLen() const { return FLen; } unsigned getMinVLen() const { return MinVLen; } unsigned getMaxVLen() const { return 65536; } unsigned getMaxELen() const { return MaxELen; } @@ -104,8 +105,7 @@ private: OrderedExtensionMap Exts; - void addExtension(StringRef ExtName, unsigned MajorVersion, - unsigned MinorVersion); + void addExtension(StringRef ExtName, ExtensionVersion Version); Error checkDependency(); diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 3824b1c66951..c005218c80f4 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -687,6 +687,9 @@ HANDLE_TARGET_OPCODE(G_FMINIMUM) HANDLE_TARGET_OPCODE(G_FMAXIMUM) /// Access to FP environment. +HANDLE_TARGET_OPCODE(G_GET_FPENV) +HANDLE_TARGET_OPCODE(G_SET_FPENV) +HANDLE_TARGET_OPCODE(G_RESET_FPENV) HANDLE_TARGET_OPCODE(G_GET_FPMODE) HANDLE_TARGET_OPCODE(G_SET_FPMODE) HANDLE_TARGET_OPCODE(G_RESET_FPMODE) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 73e38b15bf67..2c73b67f9e1a 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1020,6 +1020,27 @@ def G_FNEARBYINT : GenericInstruction { // it is modeled as a side effect, because constrained intrinsics use the same // method. +// Reading floating-point environment. +def G_GET_FPENV : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins); + let hasSideEffects = true; +} + +// Setting floating-point environment. +def G_SET_FPENV : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins type0:$src); + let hasSideEffects = true; +} + +// Setting default floating-point environment. +def G_RESET_FPENV : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins); + let hasSideEffects = true; +} + // Reading floating-point control modes. def G_GET_FPMODE : GenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 5e704f0b9a75..f792237203b4 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -116,6 +116,9 @@ def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain> { let IfConvergent = G_INTRINSIC_CONVERGENT; } +def : GINodeEquiv<G_GET_FPENV, get_fpenv>; +def : GINodeEquiv<G_SET_FPENV, set_fpenv>; +def : GINodeEquiv<G_RESET_FPENV, reset_fpenv>; def : GINodeEquiv<G_GET_FPMODE, get_fpmode>; def : GINodeEquiv<G_SET_FPMODE, set_fpmode>; def : GINodeEquiv<G_RESET_FPMODE, reset_fpmode>; diff --git a/llvm/include/llvm/Target/TargetPfmCounters.td b/llvm/include/llvm/Target/TargetPfmCounters.td index b00f3e19c35f..33dff741fa2a 100644 --- a/llvm/include/llvm/Target/TargetPfmCounters.td +++ b/llvm/include/llvm/Target/TargetPfmCounters.td @@ -28,6 +28,24 @@ class PfmIssueCounter<string resource_name, string counter> string ResourceName = resource_name; } +// Definition of a validation event. A validation event represents a specific +// event that can be measured using performance counters that is interesting +// in regard to the snippet state. +class ValidationEvent <int event_number> { + int EventNumber = event_number; +} + +def InstructionRetired : ValidationEvent<0>; + +// PfmValidationCounter provides a mapping between the events that are +// are interesting in regards to the snippet execution environment and +// a concrete performance counter name that can be looked up in libpfm. +class PfmValidationCounter<ValidationEvent event_type, string counter> + : PfmCounter<counter> { + // The name of the event that the validation counter detects. + ValidationEvent EventType = event_type; +} + def NoPfmCounter : PfmCounter <""> {} // Set of PfmCounters for measuring sched model characteristics. @@ -38,6 +56,9 @@ class ProcPfmCounters { PfmCounter UopsCounter = NoPfmCounter; // Processors can define how to measure issued uops by defining IssueCounters. list<PfmIssueCounter> IssueCounters = []; + // Processor can list mappings between validation events and real counters + // to measure the specified events. + list<PfmValidationCounter> ValidationCounters = []; } // A binding of a set of counters to a CPU. diff --git a/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h b/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h index 1e4187c6fb11..8ae553ca80dd 100644 --- a/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h +++ b/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h @@ -42,6 +42,7 @@ struct ParsedBranchProtection { StringRef Key; bool BranchTargetEnforcement; bool BranchProtectionPAuthLR; + bool GuardedControlStack; }; bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 360fc594ef7c..b948eb6ebd12 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -539,10 +539,13 @@ void LazyValueInfoImpl::solve() { } std::pair<BasicBlock *, Value *> e = BlockValueStack.back(); assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!"); + unsigned StackSize = BlockValueStack.size(); + (void) StackSize; if (solveBlockValue(e.second, e.first)) { // The work item was completely processed. - assert(BlockValueStack.back() == e && "Nothing should have been pushed!"); + assert(BlockValueStack.size() == StackSize && + BlockValueStack.back() == e && "Nothing should have been pushed!"); #ifndef NDEBUG std::optional<ValueLatticeElement> BBLV = TheCache.getCachedValueInfo(e.second, e.first); @@ -556,7 +559,8 @@ void LazyValueInfoImpl::solve() { BlockValueSet.erase(e); } else { // More work needs to be done before revisiting. - assert(BlockValueStack.back() != e && "Stack should have been pushed!"); + assert(BlockValueStack.size() == StackSize + 1 && + "Exactly one element should have been pushed!"); } } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 8fca569a391b..a5fc267b1883 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -459,9 +459,24 @@ public: // Record all stack id indices actually used in the summary entries being // written, so that we can compact them in the case of distributed ThinLTO // indexes. - for (auto &CI : FS->callsites()) + for (auto &CI : FS->callsites()) { + // If the stack id list is empty, this callsite info was synthesized for + // a missing tail call frame. Ensure that the callee's GUID gets a value + // id. Normally we only generate these for defined summaries, which in + // the case of distributed ThinLTO is only the functions already defined + // in the module or that we want to import. We don't bother to include + // all the callee symbols as they aren't normally needed in the backend. + // However, for the synthesized callsite infos we do need the callee + // GUID in the backend so that we can correlate the identified callee + // with this callsite info (which for non-tail calls is done by the + // ordering of the callsite infos and verified via stack ids). + if (CI.StackIdIndices.empty()) { + GUIDToValueIdMap[CI.Callee.getGUID()] = ++GlobalValueId; + continue; + } for (auto Idx : CI.StackIdIndices) StackIdIndices.push_back(Idx); + } for (auto &AI : FS->allocs()) for (auto &MIB : AI.MIBs) for (auto Idx : MIB.StackIdIndices) diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index ad3ad9928987..eb372655e5f1 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -1,3 +1,11 @@ +//===-- AssignmentTrackingAnalysis.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "LiveDebugValues/LiveDebugValues.h" #include "llvm/ADT/BitVector.h" @@ -2553,6 +2561,32 @@ static void analyzeFunction(Function &Fn, const DataLayout &Layout, } } +FunctionVarLocs +DebugAssignmentTrackingAnalysis::run(Function &F, + FunctionAnalysisManager &FAM) { + if (!isAssignmentTrackingEnabled(*F.getParent())) + return FunctionVarLocs(); + + auto &DL = F.getParent()->getDataLayout(); + + FunctionVarLocsBuilder Builder; + analyzeFunction(F, DL, &Builder); + + // Save these results. + FunctionVarLocs Results; + Results.init(Builder); + return Results; +} + +AnalysisKey DebugAssignmentTrackingAnalysis::Key; + +PreservedAnalyses +DebugAssignmentTrackingPrinterPass::run(Function &F, + FunctionAnalysisManager &FAM) { + FAM.getResult<DebugAssignmentTrackingAnalysis>(F).print(OS, F); + return PreservedAnalyses::all(); +} + bool AssignmentTrackingAnalysis::runOnFunction(Function &F) { if (!isAssignmentTrackingEnabled(*F.getParent())) return false; diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 0801296cab49..599b7c72b2f5 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -1363,6 +1363,14 @@ ReoptimizeBlock: MachineBasicBlock *Pred = *(MBB->pred_end()-1); Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough); } + // Add rest successors of MBB to successors of FallThrough. Those + // successors are not directly reachable via MBB, so it should be + // landing-pad. + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) + if (*SI != &*FallThrough && !FallThrough->isSuccessor(*SI)) { + assert((*SI)->isEHPad() && "Bad CFG"); + FallThrough->copySuccessor(MBB, SI); + } // If MBB was the target of a jump table, update jump tables to go to the // fallthrough instead. if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) @@ -1624,6 +1632,15 @@ ReoptimizeBlock: } else { DidChange = true; PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); + // Add rest successors of MBB to successors of CurTBB. Those + // successors are not directly reachable via MBB, so it should be + // landing-pad. + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; + ++SI) + if (*SI != CurTBB && !CurTBB->isSuccessor(*SI)) { + assert((*SI)->isEHPad() && "Bad CFG"); + CurTBB->copySuccessor(MBB, SI); + } // If this change resulted in PMBB ending in a conditional // branch where both conditions go to the same destination, // change this to an unconditional branch. diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 8b15bdb0aca3..fc2793bd7a13 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6548,6 +6548,87 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, return false; } +bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select, + BuildFnTy &MatchInfo) { + Register DstReg = Select->getReg(0); + Register Cond = Select->getCondReg(); + Register True = Select->getTrueReg(); + Register False = Select->getFalseReg(); + LLT DstTy = MRI.getType(DstReg); + + // We need an G_ICMP on the condition register. + GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI); + if (!Cmp) + return false; + + // We want to fold the icmp and replace the select. + if (!MRI.hasOneNonDBGUse(Cmp->getReg(0))) + return false; + + CmpInst::Predicate Pred = Cmp->getCond(); + // We need a larger or smaller predicate for + // canonicalization. + if (CmpInst::isEquality(Pred)) + return false; + + Register CmpLHS = Cmp->getLHSReg(); + Register CmpRHS = Cmp->getRHSReg(); + + // We can swap CmpLHS and CmpRHS for higher hitrate. + if (True == CmpRHS && False == CmpLHS) { + std::swap(CmpLHS, CmpRHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + // (icmp X, Y) ? X : Y -> integer minmax. + // see matchSelectPattern in ValueTracking. + // Legality between G_SELECT and integer minmax can differ. + if (True == CmpLHS && False == CmpRHS) { + switch (Pred) { + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildUMax(DstReg, True, False); + }; + return true; + } + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildSMax(DstReg, True, False); + }; + return true; + } + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildUMin(DstReg, True, False); + }; + return true; + } + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildSMin(DstReg, True, False); + }; + return true; + } + default: + return false; + } + } + + return false; +} + bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { GSelect *Select = cast<GSelect>(&MI); @@ -6557,5 +6638,8 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { if (tryFoldBoolSelectToLogic(Select, MatchInfo)) return true; + if (tryFoldSelectToIntMinMax(Select, MatchInfo)) + return true; + return false; } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 6708f2baa5ed..8a6bfdc5ee66 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1919,6 +1919,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_LROUND; case Intrinsic::llround: return TargetOpcode::G_LLROUND; + case Intrinsic::get_fpenv: + return TargetOpcode::G_GET_FPENV; case Intrinsic::get_fpmode: return TargetOpcode::G_GET_FPMODE; } @@ -2502,6 +2504,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } + case Intrinsic::set_fpenv: { + Value *FPEnv = CI.getOperand(0); + MIRBuilder.buildInstr(TargetOpcode::G_SET_FPENV, {}, + {getOrCreateVReg(*FPEnv)}); + return true; + } + case Intrinsic::reset_fpenv: { + MIRBuilder.buildInstr(TargetOpcode::G_RESET_FPENV, {}, {}); + return true; + } case Intrinsic::set_fpmode: { Value *FPState = CI.getOperand(0); MIRBuilder.buildInstr(TargetOpcode::G_SET_FPMODE, {}, diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index def7f6ebeb01..21947a55874a 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -958,6 +958,13 @@ static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) { RTLIB::Libcall RTLibcall; switch (MI.getOpcode()) { + case TargetOpcode::G_GET_FPENV: + RTLibcall = RTLIB::FEGETENV; + break; + case TargetOpcode::G_SET_FPENV: + case TargetOpcode::G_RESET_FPENV: + RTLibcall = RTLIB::FESETENV; + break; case TargetOpcode::G_GET_FPMODE: RTLibcall = RTLIB::FEGETMODE; break; @@ -1232,18 +1239,21 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { MI.eraseFromParent(); return Result; } + case TargetOpcode::G_GET_FPENV: case TargetOpcode::G_GET_FPMODE: { LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; } + case TargetOpcode::G_SET_FPENV: case TargetOpcode::G_SET_FPMODE: { LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; } + case TargetOpcode::G_RESET_FPENV: case TargetOpcode::G_RESET_FPMODE: { LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI, LocObserver); diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index 55984423e5bc..ae58e135931f 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -58,18 +59,18 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, return InsertMBB == Def.getParent(); } -bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const { - MachineInstr *MI = Op.getParent(); - if (!MI->isPHI()) - return false; +unsigned Localizer::getNumPhiUses(MachineOperand &Op) const { + auto *MI = dyn_cast<GPhi>(&*Op.getParent()); + if (!MI) + return 0; Register SrcReg = Op.getReg(); - for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) { - auto &MO = MI->getOperand(Idx); - if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg) - return true; + unsigned NumUses = 0; + for (unsigned I = 0, NumVals = MI->getNumIncomingValues(); I < NumVals; ++I) { + if (MI->getIncomingValue(I) == SrcReg) + ++NumUses; } - return false; + return NumUses; } bool Localizer::localizeInterBlock(MachineFunction &MF, @@ -108,11 +109,12 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, continue; } - // If the use is a phi operand that's not unique, don't try to localize. - // If we do, we can cause unnecessary instruction bloat by duplicating - // into each predecessor block, when the existing one is sufficient and - // allows for easier optimization later. - if (isNonUniquePhiValue(MOUse)) + // PHIs look like a single user but can use the same register in multiple + // edges, causing remat into each predecessor. Allow this to a certain + // extent. + unsigned NumPhiUses = getNumPhiUses(MOUse); + const unsigned PhiThreshold = 2; // FIXME: Tune this more. + if (NumPhiUses > PhiThreshold) continue; LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); @@ -164,19 +166,22 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { if (!UseMI.isPHI()) Users.insert(&UseMI); } - // If all the users were PHIs then they're not going to be in our block, - // don't try to move this instruction. - if (Users.empty()) - continue; - MachineBasicBlock::iterator II(MI); - ++II; - while (II != MBB.end() && !Users.count(&*II)) + // If all the users were PHIs then they're not going to be in our block, we + // may still benefit from sinking, especially since the value might be live + // across a call. + if (Users.empty()) { + // Make sure we don't sink in between two terminator sequences by scanning + // forward, not backward. + II = MBB.getFirstTerminatorForward(); + LLVM_DEBUG(dbgs() << "Only phi users: moving inst to end: " << *MI); + } else { ++II; - - assert(II != MBB.end() && "Didn't find the user in the MBB"); - LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *II - << '\n'); + while (II != MBB.end() && !Users.count(&*II)) + ++II; + assert(II != MBB.end() && "Didn't find the user in the MBB"); + LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *II); + } MI->removeFromParent(); MBB.insert(II, MI); diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 9037f752dc4f..cfc8c28b99e5 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -2403,8 +2403,15 @@ bool InstrRefBasedLDV::mlocJoin( llvm::sort(BlockOrders, Cmp); // Skip entry block. - if (BlockOrders.size() == 0) + if (BlockOrders.size() == 0) { + // FIXME: We don't use assert here to prevent instr-ref-unreachable.mir + // failing. + LLVM_DEBUG(if (!MBB.isEntryBlock()) dbgs() + << "Found not reachable block " << MBB.getFullName() + << " from entry which may lead out of " + "bound access to VarLocs\n"); return false; + } // Step through all machine locations, look at each predecessor and test // whether we can eliminate redundant PHIs. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2327664516cc..ecdf9ab9e989 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4380,7 +4380,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } else { N1IsConst = isa<ConstantSDNode>(N1); if (N1IsConst) { - ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); + ConstValue1 = N1->getAsAPIntVal(); N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); } } @@ -10999,8 +10999,8 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDLoc DL(N); // fold (bswap c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::BSWAP, DL, VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0})) + return C; // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) return N0.getOperand(0); @@ -11059,10 +11059,11 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (bitreverse c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0})) + return C; // fold (bitreverse (bitreverse x)) -> x if (N0.getOpcode() == ISD::BITREVERSE) return N0.getOperand(0); @@ -11072,16 +11073,16 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (ctlz c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0})) + return C; // If the value is known never to be zero, switch to the undef version. - if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) { + if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) if (DAG.isKnownNeverZero(N0)) - return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); - } + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0); return SDValue(); } @@ -11089,26 +11090,28 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (ctlz_zero_undef c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::CTLZ_ZERO_UNDEF, DL, VT, {N0})) + return C; return SDValue(); } SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (cttz c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0})) + return C; // If the value is known never to be zero, switch to the undef version. - if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { + if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) if (DAG.isKnownNeverZero(N0)) - return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); - } + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0); return SDValue(); } @@ -11116,20 +11119,23 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (cttz_zero_undef c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::CTTZ_ZERO_UNDEF, DL, VT, {N0})) + return C; return SDValue(); } SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (ctpop c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0})) + return C; return SDValue(); } @@ -12087,8 +12093,8 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { if (N1Elt.getValueType() != N2Elt.getValueType()) continue; - const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); - const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); + const APInt &C1 = N1Elt->getAsAPIntVal(); + const APInt &C2 = N2Elt->getAsAPIntVal(); if (C1 != C2 + 1) AllAddOne = false; if (C1 != C2 - 1) @@ -12764,7 +12770,7 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SDLoc DL(Op); // Get the constant value and if needed trunc it to the size of the type. // Nodes like build_vector might have constants wider than the scalar type. - APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits); + APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits); if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); else @@ -13375,9 +13381,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { SDValue N00 = N0.getOperand(0); EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT(); - if (N00.getOpcode() == ISD::TRUNCATE && + if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) && (!LegalTypes || TLI.isTypeLegal(ExtVT))) { - SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0)); + SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00); return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T); } } @@ -17942,10 +17948,10 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { - const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); + const APInt &AndConst = AndOp1->getAsAPIntVal(); if (AndConst.isPowerOf2() && - cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) { + Op1->getAsAPIntVal() == AndConst.logBase2()) { SDLoc DL(N); return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, DL, Op0.getValueType()), @@ -18266,7 +18272,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); const APInt &Offset0 = CN->getAPIntValue(); - const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); + const APInt &Offset1 = Offset->getAsAPIntVal(); int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; @@ -19573,7 +19579,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { // Find the type to narrow it the load / op / store to. SDValue N1 = Value.getOperand(1); unsigned BitWidth = N1.getValueSizeInBits(); - APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); + APInt Imm = N1->getAsAPIntVal(); if (Opc == ISD::AND) Imm ^= APInt::getAllOnes(BitWidth); if (Imm == 0 || Imm.isAllOnes()) @@ -26542,10 +26548,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { } APInt Bits; - if (isa<ConstantSDNode>(Elt)) - Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); - else if (isa<ConstantFPSDNode>(Elt)) - Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); + if (auto *Cst = dyn_cast<ConstantSDNode>(Elt)) + Bits = Cst->getAPIntValue(); + else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt)) + Bits = CstFP->getValueAPF().bitcastToAPInt(); else return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ec74d2940099..c278bdc07360 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1854,7 +1854,7 @@ void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, // Hi = Lo + (EltCnt * Step) EVT EltVT = Step.getValueType(); - APInt StepVal = cast<ConstantSDNode>(Step)->getAPIntValue(); + APInt StepVal = Step->getAsAPIntVal(); SDValue StartOfHi = DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements()); StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType()); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b39be64c06f9..01d31806c844 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -327,7 +327,7 @@ bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize, if (!isa<ConstantSDNode>(Op)) return false; - APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().trunc(EltSize); + APInt C = Op->getAsAPIntVal().trunc(EltSize); if (Signed && C.trunc(NewEltSize).sext(EltSize) != C) return false; if (!Signed && C.trunc(NewEltSize).zext(EltSize) != C) @@ -7200,7 +7200,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, (N2VT.getVectorMinNumElements() + N3->getAsZExtVal()) <= VT.getVectorMinNumElements()) && "Insert subvector overflow!"); - assert(cast<ConstantSDNode>(N3)->getAPIntValue().getBitWidth() == + assert(N3->getAsAPIntVal().getBitWidth() == TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() && "Constant index for INSERT_SUBVECTOR has an invalid size"); @@ -9304,7 +9304,7 @@ SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, N->getValueType(0).getVectorElementCount()) && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && - cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + N->getScale()->getAsAPIntVal().isPowerOf2() && "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); @@ -9348,7 +9348,7 @@ SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, N->getValue().getValueType().getVectorElementCount()) && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && - cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + N->getScale()->getAsAPIntVal().isPowerOf2() && "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); @@ -9490,7 +9490,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, N->getValueType(0).getVectorElementCount()) && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && - cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + N->getScale()->getAsAPIntVal().isPowerOf2() && "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); @@ -9536,7 +9536,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, N->getValue().getValueType().getVectorElementCount()) && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && - cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + N->getScale()->getAsAPIntVal().isPowerOf2() && "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9acfc76d7d5e..678d273e4bd6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2697,9 +2697,14 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame( /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool -CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, - const SelectionDAGISel &SDISel, bool TwoBytePredNo) { - unsigned PredNo = MatcherTable[MatcherIndex++]; +CheckPatternPredicate(unsigned Opcode, const unsigned char *MatcherTable, + unsigned &MatcherIndex, const SelectionDAGISel &SDISel) { + bool TwoBytePredNo = + Opcode == SelectionDAGISel::OPC_CheckPatternPredicateTwoByte; + unsigned PredNo = + TwoBytePredNo || Opcode == SelectionDAGISel::OPC_CheckPatternPredicate + ? MatcherTable[MatcherIndex++] + : Opcode - SelectionDAGISel::OPC_CheckPatternPredicate0; if (TwoBytePredNo) PredNo |= MatcherTable[MatcherIndex++] << 8; return SDISel.CheckPatternPredicate(PredNo); @@ -2851,10 +2856,16 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); return Index; case SelectionDAGISel::OPC_CheckPatternPredicate: + case SelectionDAGISel::OPC_CheckPatternPredicate0: + case SelectionDAGISel::OPC_CheckPatternPredicate1: case SelectionDAGISel::OPC_CheckPatternPredicate2: - Result = !::CheckPatternPredicate( - Table, Index, SDISel, - Table[Index - 1] == SelectionDAGISel::OPC_CheckPatternPredicate2); + case SelectionDAGISel::OPC_CheckPatternPredicate3: + case SelectionDAGISel::OPC_CheckPatternPredicate4: + case SelectionDAGISel::OPC_CheckPatternPredicate5: + case SelectionDAGISel::OPC_CheckPatternPredicate6: + case SelectionDAGISel::OPC_CheckPatternPredicate7: + case SelectionDAGISel::OPC_CheckPatternPredicateTwoByte: + Result = !::CheckPatternPredicate(Opcode, Table, Index, SDISel); return Index; case SelectionDAGISel::OPC_CheckPredicate: Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode()); @@ -3336,9 +3347,16 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, continue; case OPC_CheckPatternPredicate: + case OPC_CheckPatternPredicate0: + case OPC_CheckPatternPredicate1: case OPC_CheckPatternPredicate2: - if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this, - Opcode == OPC_CheckPatternPredicate2)) + case OPC_CheckPatternPredicate3: + case OPC_CheckPatternPredicate4: + case OPC_CheckPatternPredicate5: + case OPC_CheckPatternPredicate6: + case OPC_CheckPatternPredicate7: + case OPC_CheckPatternPredicateTwoByte: + if (!::CheckPatternPredicate(Opcode, MatcherTable, MatcherIndex, *this)) break; continue; case OPC_CheckPredicate: @@ -3358,8 +3376,18 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, break; continue; } - case OPC_CheckComplexPat: { - unsigned CPNum = MatcherTable[MatcherIndex++]; + case OPC_CheckComplexPat: + case OPC_CheckComplexPat0: + case OPC_CheckComplexPat1: + case OPC_CheckComplexPat2: + case OPC_CheckComplexPat3: + case OPC_CheckComplexPat4: + case OPC_CheckComplexPat5: + case OPC_CheckComplexPat6: + case OPC_CheckComplexPat7: { + unsigned CPNum = Opcode == OPC_CheckComplexPat + ? MatcherTable[MatcherIndex++] + : Opcode - OPC_CheckComplexPat0; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e3e3e375d6a6..3bbef6e6d85d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1108,7 +1108,7 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getOpcode() == ISD::Constant) { // We know all of the bits for a constant! - Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue()); + Known = KnownBits::makeConstant(Op->getAsAPIntVal()); return false; } @@ -6350,8 +6350,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros(); // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in // the dividend exceeds the leading zeros for the divisor. - LeadingZeros = std::min( - LeadingZeros, cast<ConstantSDNode>(N1)->getAPIntValue().countl_zero()); + LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero()); } bool UseNPQ = false, UsePreShift = false, UsePostShift = false; diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 6e69dc66429d..a69b71451736 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1669,9 +1669,18 @@ static int getSelectionForCOFF(const GlobalValue *GV) { MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + StringRef Name = GO->getSection(); + if (Name == getInstrProfSectionName(IPSK_covmap, Triple::COFF, + /*AddSegmentInfo=*/false) || + Name == getInstrProfSectionName(IPSK_covfun, Triple::COFF, + /*AddSegmentInfo=*/false) || + Name == getInstrProfSectionName(IPSK_covdata, Triple::COFF, + /*AddSegmentInfo=*/false) || + Name == getInstrProfSectionName(IPSK_covname, Triple::COFF, + /*AddSegmentInfo=*/false)) + Kind = SectionKind::getMetadata(); int Selection = 0; unsigned Characteristics = getCOFFSectionFlags(Kind, TM); - StringRef Name = GO->getSection(); StringRef COMDATSymName = ""; if (GO->hasComdat()) { Selection = getSelectionForCOFF(GO); diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp index 8d76c3bcf672..ac2c26e52240 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/NonRelocatableStringpool.h" #include "llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h" #include "llvm/DWARFLinker/Classic/DWARFStreamer.h" +#include "llvm/DWARFLinker/Utils.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -176,20 +177,6 @@ static void resolveRelativeObjectPath(SmallVectorImpl<char> &Buf, DWARFDie CU) { sys::path::append(Buf, dwarf::toString(CU.find(dwarf::DW_AT_comp_dir), "")); } -/// Make a best effort to guess the -/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path. -static SmallString<128> guessToolchainBaseDir(StringRef SysRoot) { - SmallString<128> Result; - // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk - StringRef Base = sys::path::parent_path(SysRoot); - if (sys::path::filename(Base) != "SDKs") - return Result; - Base = sys::path::parent_path(Base); - Result = Base; - Result += "/Toolchains"; - return Result; -} - /// Collect references to parseable Swift interfaces in imported /// DW_TAG_module blocks. static void analyzeImportedModule( diff --git a/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp b/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp index 3af574c70561..9af222354551 100644 --- a/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp +++ b/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "AcceleratorRecordsSaver.h" -#include "Utils.h" +#include "llvm/DWARFLinker/Utils.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/Support/DJB.h" diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp index ffcf9f365aec..6ed284a66a85 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp @@ -12,6 +12,7 @@ #include "DIEGenerator.h" #include "DependencyTracker.h" #include "SyntheticTypeNameBuilder.h" +#include "llvm/DWARFLinker/Utils.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" #include "llvm/Support/DJB.h" @@ -247,20 +248,6 @@ void CompileUnit::cleanupDataAfterClonning() { getOrigUnit().clear(); } -/// Make a best effort to guess the -/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path. -static SmallString<128> guessToolchainBaseDir(StringRef SysRoot) { - SmallString<128> Result; - // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk - StringRef Base = sys::path::parent_path(SysRoot); - if (sys::path::filename(Base) != "SDKs") - return Result; - Base = sys::path::parent_path(Base); - Result = Base; - Result += "/Toolchains"; - return Result; -} - /// Collect references to parseable Swift interfaces in imported /// DW_TAG_module blocks. void CompileUnit::analyzeImportedModule(const DWARFDebugInfoEntry *DieEntry) { @@ -1698,14 +1685,6 @@ CompileUnit::getDirAndFilenameFromLineTable( return getDirAndFilenameFromLineTable(FileIdx); } -static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { - // Debug info can contain paths from any OS, not necessarily - // an OS we're currently running on. Moreover different compilation units can - // be compiled on different operating systems and linked together later. - return sys::path::is_absolute(Path, sys::path::Style::posix) || - sys::path::is_absolute(Path, sys::path::Style::windows); -} - std::optional<std::pair<StringRef, StringRef>> CompileUnit::getDirAndFilenameFromLineTable(uint64_t FileIdx) { FileNamesCache::iterator FileData = FileNames.find(FileIdx); diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp index bb59cbfdb347..b0b819cf9778 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp @@ -9,7 +9,7 @@ #include "DWARFLinkerImpl.h" #include "DIEGenerator.h" #include "DependencyTracker.h" -#include "Utils.h" +#include "llvm/DWARFLinker/Utils.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Parallel.h" diff --git a/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h index 545d04cfbe43..1839164dcec1 100644 --- a/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h +++ b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h @@ -193,24 +193,39 @@ private: Section.emitString(Include.getForm(), *IncludeStr); } + bool HasChecksums = P.ContentTypes.HasMD5; + bool HasInlineSources = P.ContentTypes.HasSource; + + dwarf::Form FileNameForm = dwarf::DW_FORM_string; + dwarf::Form LLVMSourceForm = dwarf::DW_FORM_string; + if (P.FileNames.empty()) { // file_name_entry_format_count (ubyte). Section.emitIntVal(0, 1); } else { + FileNameForm = P.FileNames[0].Name.getForm(); + LLVMSourceForm = P.FileNames[0].Source.getForm(); + // file_name_entry_format_count (ubyte). - Section.emitIntVal(2 + (P.ContentTypes.HasMD5 ? 1 : 0), 1); + Section.emitIntVal( + 2 + (HasChecksums ? 1 : 0) + (HasInlineSources ? 1 : 0), 1); // file_name_entry_format (sequence of ULEB128 pairs). encodeULEB128(dwarf::DW_LNCT_path, Section.OS); - encodeULEB128(P.FileNames[0].Name.getForm(), Section.OS); + encodeULEB128(FileNameForm, Section.OS); encodeULEB128(dwarf::DW_LNCT_directory_index, Section.OS); encodeULEB128(dwarf::DW_FORM_data1, Section.OS); - if (P.ContentTypes.HasMD5) { + if (HasChecksums) { encodeULEB128(dwarf::DW_LNCT_MD5, Section.OS); encodeULEB128(dwarf::DW_FORM_data16, Section.OS); } + + if (HasInlineSources) { + encodeULEB128(dwarf::DW_LNCT_LLVM_source, Section.OS); + encodeULEB128(LLVMSourceForm, Section.OS); + } } // file_names_count (ULEB128). @@ -226,14 +241,27 @@ private: // A null-terminated string containing the full or relative path name of a // source file. - Section.emitString(File.Name.getForm(), *FileNameStr); + Section.emitString(FileNameForm, *FileNameStr); Section.emitIntVal(File.DirIdx, 1); - if (P.ContentTypes.HasMD5) { + if (HasChecksums) { + assert((File.Checksum.size() == 16) && + "checksum size is not equal to 16 bytes."); Section.emitBinaryData( StringRef(reinterpret_cast<const char *>(File.Checksum.data()), File.Checksum.size())); } + + if (HasInlineSources) { + std::optional<const char *> FileSourceStr = + dwarf::toString(File.Source); + if (!FileSourceStr) { + U.warn("cann't read string from line table."); + return; + } + + Section.emitString(LLVMSourceForm, *FileSourceStr); + } } } diff --git a/llvm/lib/DWARFLinker/Utils.cpp b/llvm/lib/DWARFLinker/Utils.cpp index e8b0fe303aae..52508c998532 100644 --- a/llvm/lib/DWARFLinker/Utils.cpp +++ b/llvm/lib/DWARFLinker/Utils.cpp @@ -5,3 +5,5 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + +#include "llvm/DWARFLinker/Utils.h" diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index aeaca21a99cc..b6ad85b2d46e 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -96,6 +96,7 @@ #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -6031,6 +6032,16 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "Value for inactive lanes must be a VGPR function argument", &Call); break; } + case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32: + case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: { + Value *V = Call.getArgOperand(0); + unsigned RegCount = cast<ConstantInt>(V)->getZExtValue(); + Check(RegCount % 8 == 0, + "reg_count argument to nvvm.setmaxnreg must be in multiples of 8"); + Check((RegCount >= 24 && RegCount <= 256), + "reg_count argument to nvvm.setmaxnreg must be within [24, 256]"); + break; + } case Intrinsic::experimental_convergence_entry: LLVM_FALLTHROUGH; case Intrinsic::experimental_convergence_anchor: diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 93e1d2f44b8c..d4c4bcb85648 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -616,12 +616,12 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { if (Mergeable) if (parseMergeSize(Size)) return true; - if (Group) - if (parseGroup(GroupName, IsComdat)) - return true; if (Flags & ELF::SHF_LINK_ORDER) if (parseLinkedToSym(LinkedToSym)) return true; + if (Group) + if (parseGroup(GroupName, IsComdat)) + return true; if (maybeParseUniqueID(UniqueID)) return true; } diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp index 95fdf3352207..b1efb839ba75 100644 --- a/llvm/lib/MC/MCSectionELF.cpp +++ b/llvm/lib/MC/MCSectionELF.cpp @@ -90,8 +90,6 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << 'e'; if (Flags & ELF::SHF_EXECINSTR) OS << 'x'; - if (Flags & ELF::SHF_GROUP) - OS << 'G'; if (Flags & ELF::SHF_WRITE) OS << 'w'; if (Flags & ELF::SHF_MERGE) @@ -102,6 +100,8 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << 'T'; if (Flags & ELF::SHF_LINK_ORDER) OS << 'o'; + if (Flags & ELF::SHF_GROUP) + OS << 'G'; if (Flags & ELF::SHF_GNU_RETAIN) OS << 'R'; @@ -183,13 +183,6 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << "," << EntrySize; } - if (Flags & ELF::SHF_GROUP) { - OS << ","; - printName(OS, Group.getPointer()->getName()); - if (isComdat()) - OS << ",comdat"; - } - if (Flags & ELF::SHF_LINK_ORDER) { OS << ","; if (LinkedToSym) @@ -198,6 +191,13 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << '0'; } + if (Flags & ELF::SHF_GROUP) { + OS << ","; + printName(OS, Group.getPointer()->getName()); + if (isComdat()) + OS << ",comdat"; + } + if (isUnique()) OS << ",unique," << UniqueID; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 27bfe12127cc..bfc97d5464c0 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -72,6 +72,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/CallBrPrepare.h" #include "llvm/CodeGen/CodeGenPrepare.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index bda36bd8c107..0b53b59787dd 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -235,6 +235,7 @@ FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) FUNCTION_ANALYSIS("cycles", CycleAnalysis()) FUNCTION_ANALYSIS("da", DependenceAnalysis()) +FUNCTION_ANALYSIS("debug-ata", DebugAssignmentTrackingAnalysis()) FUNCTION_ANALYSIS("demanded-bits", DemandedBitsAnalysis()) FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) @@ -384,6 +385,7 @@ FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs())) FUNCTION_PASS("print<cost-model>", CostModelPrinterPass(dbgs())) FUNCTION_PASS("print<cycles>", CycleInfoPrinterPass(dbgs())) FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs())) +FUNCTION_PASS("print<debug-ata>", DebugAssignmentTrackingPrinterPass(dbgs())) FUNCTION_PASS("print<delinearization>", DelinearizationPrinterPass(dbgs())) FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs())) FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs())) @@ -421,7 +423,7 @@ FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) -FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass()) +FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass()) FUNCTION_PASS("tsan", ThreadSanitizerPass()) FUNCTION_PASS("typepromotion", TypePromotionPass(TM)) FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass()) diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 70f531e40b90..390d950486a7 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -24,16 +24,11 @@ using namespace llvm; namespace { -/// Represents the major and version number components of a RISC-V extension -struct RISCVExtensionVersion { - unsigned Major; - unsigned Minor; -}; struct RISCVSupportedExtension { const char *Name; /// Supported version. - RISCVExtensionVersion Version; + RISCVISAInfo::ExtensionVersion Version; bool operator<(const RISCVSupportedExtension &RHS) const { return StringRef(Name) < StringRef(RHS.Name); @@ -50,161 +45,161 @@ static const char *RISCVGImplications[] = { // NOTE: This table should be sorted alphabetically by extension name. static const RISCVSupportedExtension SupportedExtensions[] = { - {"a", RISCVExtensionVersion{2, 1}}, - {"c", RISCVExtensionVersion{2, 0}}, - {"d", RISCVExtensionVersion{2, 2}}, - {"e", RISCVExtensionVersion{2, 0}}, - {"f", RISCVExtensionVersion{2, 2}}, - {"h", RISCVExtensionVersion{1, 0}}, - {"i", RISCVExtensionVersion{2, 1}}, - {"m", RISCVExtensionVersion{2, 0}}, + {"a", {2, 1}}, + {"c", {2, 0}}, + {"d", {2, 2}}, + {"e", {2, 0}}, + {"f", {2, 2}}, + {"h", {1, 0}}, + {"i", {2, 1}}, + {"m", {2, 0}}, - {"smaia", RISCVExtensionVersion{1, 0}}, - {"ssaia", RISCVExtensionVersion{1, 0}}, - {"svinval", RISCVExtensionVersion{1, 0}}, - {"svnapot", RISCVExtensionVersion{1, 0}}, - {"svpbmt", RISCVExtensionVersion{1, 0}}, + {"smaia", {1, 0}}, + {"ssaia", {1, 0}}, + {"svinval", {1, 0}}, + {"svnapot", {1, 0}}, + {"svpbmt", {1, 0}}, - {"v", RISCVExtensionVersion{1, 0}}, + {"v", {1, 0}}, // vendor-defined ('X') extensions - {"xcvalu", RISCVExtensionVersion{1, 0}}, - {"xcvbi", RISCVExtensionVersion{1, 0}}, - {"xcvbitmanip", RISCVExtensionVersion{1, 0}}, - {"xcvelw", RISCVExtensionVersion{1, 0}}, - {"xcvmac", RISCVExtensionVersion{1, 0}}, - {"xcvmem", RISCVExtensionVersion{1, 0}}, - {"xcvsimd", RISCVExtensionVersion{1, 0}}, - {"xsfvcp", RISCVExtensionVersion{1, 0}}, - {"xsfvfnrclipxfqf", RISCVExtensionVersion{1, 0}}, - {"xsfvfwmaccqqq", RISCVExtensionVersion{1, 0}}, - {"xsfvqmaccdod", RISCVExtensionVersion{1, 0}}, - {"xsfvqmaccqoq", RISCVExtensionVersion{1, 0}}, - {"xtheadba", RISCVExtensionVersion{1, 0}}, - {"xtheadbb", RISCVExtensionVersion{1, 0}}, - {"xtheadbs", RISCVExtensionVersion{1, 0}}, - {"xtheadcmo", RISCVExtensionVersion{1, 0}}, - {"xtheadcondmov", RISCVExtensionVersion{1, 0}}, - {"xtheadfmemidx", RISCVExtensionVersion{1, 0}}, - {"xtheadmac", RISCVExtensionVersion{1, 0}}, - {"xtheadmemidx", RISCVExtensionVersion{1, 0}}, - {"xtheadmempair", RISCVExtensionVersion{1, 0}}, - {"xtheadsync", RISCVExtensionVersion{1, 0}}, - {"xtheadvdot", RISCVExtensionVersion{1, 0}}, - {"xventanacondops", RISCVExtensionVersion{1, 0}}, + {"xcvalu", {1, 0}}, + {"xcvbi", {1, 0}}, + {"xcvbitmanip", {1, 0}}, + {"xcvelw", {1, 0}}, + {"xcvmac", {1, 0}}, + {"xcvmem", {1, 0}}, + {"xcvsimd", {1, 0}}, + {"xsfvcp", {1, 0}}, + {"xsfvfnrclipxfqf", {1, 0}}, + {"xsfvfwmaccqqq", {1, 0}}, + {"xsfvqmaccdod", {1, 0}}, + {"xsfvqmaccqoq", {1, 0}}, + {"xtheadba", {1, 0}}, + {"xtheadbb", {1, 0}}, + {"xtheadbs", {1, 0}}, + {"xtheadcmo", {1, 0}}, + {"xtheadcondmov", {1, 0}}, + {"xtheadfmemidx", {1, 0}}, + {"xtheadmac", {1, 0}}, + {"xtheadmemidx", {1, 0}}, + {"xtheadmempair", {1, 0}}, + {"xtheadsync", {1, 0}}, + {"xtheadvdot", {1, 0}}, + {"xventanacondops", {1, 0}}, - {"zawrs", RISCVExtensionVersion{1, 0}}, + {"zawrs", {1, 0}}, - {"zba", RISCVExtensionVersion{1, 0}}, - {"zbb", RISCVExtensionVersion{1, 0}}, - {"zbc", RISCVExtensionVersion{1, 0}}, - {"zbkb", RISCVExtensionVersion{1, 0}}, - {"zbkc", RISCVExtensionVersion{1, 0}}, - {"zbkx", RISCVExtensionVersion{1, 0}}, - {"zbs", RISCVExtensionVersion{1, 0}}, + {"zba", {1, 0}}, + {"zbb", {1, 0}}, + {"zbc", {1, 0}}, + {"zbkb", {1, 0}}, + {"zbkc", {1, 0}}, + {"zbkx", {1, 0}}, + {"zbs", {1, 0}}, - {"zca", RISCVExtensionVersion{1, 0}}, - {"zcb", RISCVExtensionVersion{1, 0}}, - {"zcd", RISCVExtensionVersion{1, 0}}, - {"zce", RISCVExtensionVersion{1, 0}}, - {"zcf", RISCVExtensionVersion{1, 0}}, - {"zcmp", RISCVExtensionVersion{1, 0}}, - {"zcmt", RISCVExtensionVersion{1, 0}}, + {"zca", {1, 0}}, + {"zcb", {1, 0}}, + {"zcd", {1, 0}}, + {"zce", {1, 0}}, + {"zcf", {1, 0}}, + {"zcmp", {1, 0}}, + {"zcmt", {1, 0}}, - {"zdinx", RISCVExtensionVersion{1, 0}}, + {"zdinx", {1, 0}}, - {"zfa", RISCVExtensionVersion{1, 0}}, - {"zfh", RISCVExtensionVersion{1, 0}}, - {"zfhmin", RISCVExtensionVersion{1, 0}}, - {"zfinx", RISCVExtensionVersion{1, 0}}, + {"zfa", {1, 0}}, + {"zfh", {1, 0}}, + {"zfhmin", {1, 0}}, + {"zfinx", {1, 0}}, - {"zhinx", RISCVExtensionVersion{1, 0}}, - {"zhinxmin", RISCVExtensionVersion{1, 0}}, + {"zhinx", {1, 0}}, + {"zhinxmin", {1, 0}}, - {"zicbom", RISCVExtensionVersion{1, 0}}, - {"zicbop", RISCVExtensionVersion{1, 0}}, - {"zicboz", RISCVExtensionVersion{1, 0}}, - {"zicntr", RISCVExtensionVersion{2, 0}}, - {"zicsr", RISCVExtensionVersion{2, 0}}, - {"zifencei", RISCVExtensionVersion{2, 0}}, - {"zihintntl", RISCVExtensionVersion{1, 0}}, - {"zihintpause", RISCVExtensionVersion{2, 0}}, - {"zihpm", RISCVExtensionVersion{2, 0}}, + {"zicbom", {1, 0}}, + {"zicbop", {1, 0}}, + {"zicboz", {1, 0}}, + {"zicntr", {2, 0}}, + {"zicsr", {2, 0}}, + {"zifencei", {2, 0}}, + {"zihintntl", {1, 0}}, + {"zihintpause", {2, 0}}, + {"zihpm", {2, 0}}, - {"zk", RISCVExtensionVersion{1, 0}}, - {"zkn", RISCVExtensionVersion{1, 0}}, - {"zknd", RISCVExtensionVersion{1, 0}}, - {"zkne", RISCVExtensionVersion{1, 0}}, - {"zknh", RISCVExtensionVersion{1, 0}}, - {"zkr", RISCVExtensionVersion{1, 0}}, - {"zks", RISCVExtensionVersion{1, 0}}, - {"zksed", RISCVExtensionVersion{1, 0}}, - {"zksh", RISCVExtensionVersion{1, 0}}, - {"zkt", RISCVExtensionVersion{1, 0}}, + {"zk", {1, 0}}, + {"zkn", {1, 0}}, + {"zknd", {1, 0}}, + {"zkne", {1, 0}}, + {"zknh", {1, 0}}, + {"zkr", {1, 0}}, + {"zks", {1, 0}}, + {"zksed", {1, 0}}, + {"zksh", {1, 0}}, + {"zkt", {1, 0}}, - {"zmmul", RISCVExtensionVersion{1, 0}}, + {"zmmul", {1, 0}}, - {"zvbb", RISCVExtensionVersion{1, 0}}, - {"zvbc", RISCVExtensionVersion{1, 0}}, + {"zvbb", {1, 0}}, + {"zvbc", {1, 0}}, - {"zve32f", RISCVExtensionVersion{1, 0}}, - {"zve32x", RISCVExtensionVersion{1, 0}}, - {"zve64d", RISCVExtensionVersion{1, 0}}, - {"zve64f", RISCVExtensionVersion{1, 0}}, - {"zve64x", RISCVExtensionVersion{1, 0}}, + {"zve32f", {1, 0}}, + {"zve32x", {1, 0}}, + {"zve64d", {1, 0}}, + {"zve64f", {1, 0}}, + {"zve64x", {1, 0}}, - {"zvfh", RISCVExtensionVersion{1, 0}}, - {"zvfhmin", RISCVExtensionVersion{1, 0}}, + {"zvfh", {1, 0}}, + {"zvfhmin", {1, 0}}, // vector crypto - {"zvkb", RISCVExtensionVersion{1, 0}}, - {"zvkg", RISCVExtensionVersion{1, 0}}, - {"zvkn", RISCVExtensionVersion{1, 0}}, - {"zvknc", RISCVExtensionVersion{1, 0}}, - {"zvkned", RISCVExtensionVersion{1, 0}}, - {"zvkng", RISCVExtensionVersion{1, 0}}, - {"zvknha", RISCVExtensionVersion{1, 0}}, - {"zvknhb", RISCVExtensionVersion{1, 0}}, - {"zvks", RISCVExtensionVersion{1, 0}}, - {"zvksc", RISCVExtensionVersion{1, 0}}, - {"zvksed", RISCVExtensionVersion{1, 0}}, - {"zvksg", RISCVExtensionVersion{1, 0}}, - {"zvksh", RISCVExtensionVersion{1, 0}}, - {"zvkt", RISCVExtensionVersion{1, 0}}, + {"zvkb", {1, 0}}, + {"zvkg", {1, 0}}, + {"zvkn", {1, 0}}, + {"zvknc", {1, 0}}, + {"zvkned", {1, 0}}, + {"zvkng", {1, 0}}, + {"zvknha", {1, 0}}, + {"zvknhb", {1, 0}}, + {"zvks", {1, 0}}, + {"zvksc", {1, 0}}, + {"zvksed", {1, 0}}, + {"zvksg", {1, 0}}, + {"zvksh", {1, 0}}, + {"zvkt", {1, 0}}, - {"zvl1024b", RISCVExtensionVersion{1, 0}}, - {"zvl128b", RISCVExtensionVersion{1, 0}}, - {"zvl16384b", RISCVExtensionVersion{1, 0}}, - {"zvl2048b", RISCVExtensionVersion{1, 0}}, - {"zvl256b", RISCVExtensionVersion{1, 0}}, - {"zvl32768b", RISCVExtensionVersion{1, 0}}, - {"zvl32b", RISCVExtensionVersion{1, 0}}, - {"zvl4096b", RISCVExtensionVersion{1, 0}}, - {"zvl512b", RISCVExtensionVersion{1, 0}}, - {"zvl64b", RISCVExtensionVersion{1, 0}}, - {"zvl65536b", RISCVExtensionVersion{1, 0}}, - {"zvl8192b", RISCVExtensionVersion{1, 0}}, + {"zvl1024b", {1, 0}}, + {"zvl128b", {1, 0}}, + {"zvl16384b", {1, 0}}, + {"zvl2048b", {1, 0}}, + {"zvl256b", {1, 0}}, + {"zvl32768b", {1, 0}}, + {"zvl32b", {1, 0}}, + {"zvl4096b", {1, 0}}, + {"zvl512b", {1, 0}}, + {"zvl64b", {1, 0}}, + {"zvl65536b", {1, 0}}, + {"zvl8192b", {1, 0}}, }; // NOTE: This table should be sorted alphabetically by extension name. static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { - {"zacas", RISCVExtensionVersion{1, 0}}, + {"zacas", {1, 0}}, - {"zcmop", RISCVExtensionVersion{0, 2}}, + {"zcmop", {0, 2}}, - {"zfbfmin", RISCVExtensionVersion{0, 8}}, + {"zfbfmin", {0, 8}}, - {"zicfilp", RISCVExtensionVersion{0, 4}}, - {"zicfiss", RISCVExtensionVersion{0, 4}}, + {"zicfilp", {0, 4}}, + {"zicfiss", {0, 4}}, - {"zicond", RISCVExtensionVersion{1, 0}}, + {"zicond", {1, 0}}, - {"zimop", RISCVExtensionVersion{0, 1}}, + {"zimop", {0, 1}}, - {"ztso", RISCVExtensionVersion{0, 1}}, + {"ztso", {0, 1}}, - {"zvfbfmin", RISCVExtensionVersion{0, 8}}, - {"zvfbfwma", RISCVExtensionVersion{0, 8}}, + {"zvfbfmin", {0, 8}}, + {"zvfbfwma", {0, 8}}, }; static void verifyTables() { @@ -237,8 +232,8 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) { for (const auto &E : SupportedExtensions) ExtMap[E.Name] = {E.Version.Major, E.Version.Minor}; for (const auto &E : ExtMap) { - std::string Version = std::to_string(E.second.MajorVersion) + "." + - std::to_string(E.second.MinorVersion); + std::string Version = + std::to_string(E.second.Major) + "." + std::to_string(E.second.Minor); PrintExtension(E.first, Version, DescMap[E.first]); } @@ -247,8 +242,8 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) { for (const auto &E : SupportedExperimentalExtensions) ExtMap[E.Name] = {E.Version.Major, E.Version.Minor}; for (const auto &E : ExtMap) { - std::string Version = std::to_string(E.second.MajorVersion) + "." + - std::to_string(E.second.MinorVersion); + std::string Version = + std::to_string(E.second.Major) + "." + std::to_string(E.second.Minor); PrintExtension(E.first, Version, DescMap["experimental-" + E.first]); } @@ -293,7 +288,7 @@ struct LessExtName { }; } // namespace -static std::optional<RISCVExtensionVersion> +static std::optional<RISCVISAInfo::ExtensionVersion> findDefaultVersion(StringRef ExtName) { // Find default version of an extension. // TODO: We might set default version based on profile or ISA spec. @@ -309,12 +304,9 @@ findDefaultVersion(StringRef ExtName) { return std::nullopt; } -void RISCVISAInfo::addExtension(StringRef ExtName, unsigned MajorVersion, - unsigned MinorVersion) { - RISCVExtensionInfo Ext; - Ext.MajorVersion = MajorVersion; - Ext.MinorVersion = MinorVersion; - Exts[ExtName.str()] = Ext; +void RISCVISAInfo::addExtension(StringRef ExtName, + RISCVISAInfo::ExtensionVersion Version) { + Exts[ExtName.str()] = Version; } static StringRef getExtensionTypeDesc(StringRef Ext) { @@ -337,7 +329,7 @@ static StringRef getExtensionType(StringRef Ext) { return StringRef(); } -static std::optional<RISCVExtensionVersion> +static std::optional<RISCVISAInfo::ExtensionVersion> isExperimentalExtension(StringRef Ext) { auto I = llvm::lower_bound(SupportedExperimentalExtensions, Ext, LessExtName()); @@ -634,8 +626,7 @@ RISCVISAInfo::parseFeatures(unsigned XLen, continue; if (Add) - ISAInfo->addExtension(ExtName, ExtensionInfoIterator->Version.Major, - ExtensionInfoIterator->Version.Minor); + ISAInfo->addExtension(ExtName, ExtensionInfoIterator->Version); else ISAInfo->Exts.erase(ExtName.str()); } @@ -696,7 +687,7 @@ RISCVISAInfo::parseNormalizedArchString(StringRef Arch) { if (MajorVersionStr.getAsInteger(10, MajorVersion)) return createStringError(errc::invalid_argument, "failed to parse major version number"); - ISAInfo->addExtension(ExtName, MajorVersion, MinorVersion); + ISAInfo->addExtension(ExtName, {MajorVersion, MinorVersion}); } ISAInfo->updateFLen(); ISAInfo->updateMinVLen(); @@ -775,7 +766,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, // ISA spec. for (const auto *Ext : RISCVGImplications) { if (auto Version = findDefaultVersion(Ext)) - ISAInfo->addExtension(Ext, Version->Major, Version->Minor); + ISAInfo->addExtension(Ext, *Version); else llvm_unreachable("Default extension version not found?"); } @@ -794,7 +785,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, Minor = Version->Minor; } - ISAInfo->addExtension(StringRef(&Baseline, 1), Major, Minor); + ISAInfo->addExtension(StringRef(&Baseline, 1), {Major, Minor}); } // Consume the base ISA version number and any '_' between rvxxx and the @@ -860,7 +851,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, "unsupported standard user-level extension '%c'", C); } - ISAInfo->addExtension(StringRef(&C, 1), Major, Minor); + ISAInfo->addExtension(StringRef(&C, 1), {Major, Minor}); // Consume full extension name and version, including any optional '_' // between this extension and the next @@ -928,7 +919,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, if (IgnoreUnknown && !isSupportedExtension(Name)) continue; - ISAInfo->addExtension(Name, Major, Minor); + ISAInfo->addExtension(Name, {Major, Minor}); // Extension format is correct, keep parsing the extensions. // TODO: Save Type, Name, Major, Minor to avoid parsing them later. AllExts.push_back(Name); @@ -1143,7 +1134,7 @@ void RISCVISAInfo::updateImplication() { // implied if (!HasE && !HasI) { auto Version = findDefaultVersion("i"); - addExtension("i", Version->Major, Version->Minor); + addExtension("i", Version.value()); } assert(llvm::is_sorted(ImpliedExts) && "Table not sorted by Name"); @@ -1164,7 +1155,7 @@ void RISCVISAInfo::updateImplication() { if (Exts.count(ImpliedExt)) continue; auto Version = findDefaultVersion(ImpliedExt); - addExtension(ImpliedExt, Version->Major, Version->Minor); + addExtension(ImpliedExt, Version.value()); WorkList.insert(ImpliedExt); } } @@ -1174,7 +1165,7 @@ void RISCVISAInfo::updateImplication() { if (XLen == 32 && Exts.count("zce") && Exts.count("f") && !Exts.count("zcf")) { auto Version = findDefaultVersion("zcf"); - addExtension("zcf", Version->Major, Version->Minor); + addExtension("zcf", Version.value()); } } @@ -1209,7 +1200,7 @@ void RISCVISAInfo::updateCombination() { IsAllRequiredFeatureExist &= hasExtension(Ext); if (IsAllRequiredFeatureExist) { auto Version = findDefaultVersion(CombineExt); - addExtension(CombineExt, Version->Major, Version->Minor); + addExtension(CombineExt, Version.value()); IsNewCombine = true; } } @@ -1266,7 +1257,7 @@ std::string RISCVISAInfo::toString() const { StringRef ExtName = Ext.first; auto ExtInfo = Ext.second; Arch << LS << ExtName; - Arch << ExtInfo.MajorVersion << "p" << ExtInfo.MinorVersion; + Arch << ExtInfo.Major << "p" << ExtInfo.Minor; } return Arch.str(); diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index aa981fdab4b3..2b3e8a0c7f84 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -923,15 +923,16 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const { case GETDAGOP: if (DagInit *Dag = dyn_cast<DagInit>(LHS)) { - DefInit *DI = DefInit::get(Dag->getOperatorAsDef({})); - if (!DI->getType()->typeIsA(getType())) { + // TI is not necessarily a def due to the late resolution in multiclasses, + // but has to be a TypedInit. + auto *TI = cast<TypedInit>(Dag->getOperator()); + if (!TI->getType()->typeIsA(getType())) { PrintFatalError(CurRec->getLoc(), - Twine("Expected type '") + - getType()->getAsString() + "', got '" + - DI->getType()->getAsString() + "' in: " + - getAsString() + "\n"); + Twine("Expected type '") + getType()->getAsString() + + "', got '" + TI->getType()->getAsString() + + "' in: " + getAsString() + "\n"); } else { - return DI; + return Dag->getOperator(); } } break; diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 90e1ce9ddf66..7d2ff146a340 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -256,6 +256,11 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { if (BTE->getZExtValue()) Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI; + if (const auto *GCS = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("guarded-control-stack"))) + if (GCS->getZExtValue()) + Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_GCS; + if (const auto *Sign = mdconst::extract_or_null<ConstantInt>( M.getModuleFlag("sign-return-address"))) if (Sign->getZExtValue()) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index edc8cc7d4d1e..ea5679b4d5e3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -6834,10 +6834,10 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4); case Intrinsic::aarch64_sve_ld1udq: - case Intrinsic::aarch64_sve_st1udq: + case Intrinsic::aarch64_sve_st1dq: return EVT(MVT::nxv1i64); case Intrinsic::aarch64_sve_ld1uwq: - case Intrinsic::aarch64_sve_st1uwq: + case Intrinsic::aarch64_sve_st1wq: return EVT(MVT::nxv1i32); } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 47e665176e8b..e2d07a096496 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4513,8 +4513,7 @@ static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG) { SDLoc dl(N); SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i != NumElts; ++i) { - ConstantSDNode *C = cast<ConstantSDNode>(N.getOperand(i)); - const APInt &CInt = C->getAPIntValue(); + const APInt &CInt = N.getConstantOperandAPInt(i); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 1cfbf4737a6f..42b7a6418032 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4214,6 +4214,9 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { switch (FirstOpc) { default: return false; + case AArch64::LDRQui: + case AArch64::LDURQi: + return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi; case AArch64::LDRWui: case AArch64::LDURWi: return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index b435b3ce03e7..e90b8a8ca7ac 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1326,10 +1326,14 @@ static int alignTo(int Num, int PowOf2) { static bool mayAlias(MachineInstr &MIa, SmallVectorImpl<MachineInstr *> &MemInsns, AliasAnalysis *AA) { - for (MachineInstr *MIb : MemInsns) - if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) + for (MachineInstr *MIb : MemInsns) { + if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) { + LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump()); return true; + } + } + LLVM_DEBUG(dbgs() << "No aliases found\n"); return false; } @@ -1757,9 +1761,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // Remember any instructions that read/write memory between FirstMI and MI. SmallVector<MachineInstr *, 4> MemInsns; + LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump()); for (unsigned Count = 0; MBBI != E && Count < Limit; MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; + LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump()); UsedInBetween.accumulate(MI); @@ -1859,6 +1865,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, " + << "keep looking.\n"); continue; } // If the alignment requirements of the paired (scaled) instruction @@ -1868,6 +1876,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() + << "Offset doesn't fit due to alignment requirements, " + << "keep looking.\n"); continue; } } @@ -1884,14 +1895,22 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq( Reg, getLdStRegOp(MI).getReg()); - // If the Rt of the second instruction was not modified or used between - // the two instructions and none of the instructions between the second - // and first alias with the second, we can combine the second into the - // first. - if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) && - !(MI.mayLoad() && !SameLoadReg && - !UsedRegUnits.available(getLdStRegOp(MI).getReg())) && - !mayAlias(MI, MemInsns, AA)) { + // If the Rt of the second instruction (destination register of the + // load) was not modified or used between the two instructions and none + // of the instructions between the second and first alias with the + // second, we can combine the second into the first. + bool RtNotModified = + ModifiedRegUnits.available(getLdStRegOp(MI).getReg()); + bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg && + !UsedRegUnits.available(getLdStRegOp(MI).getReg())); + + LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n" + << "Reg '" << getLdStRegOp(MI) << "' not modified: " + << (RtNotModified ? "true" : "false") << "\n" + << "Reg '" << getLdStRegOp(MI) << "' not used: " + << (RtNotUsed ? "true" : "false") << "\n"); + + if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) { // For pairs loading into the same reg, try to find a renaming // opportunity to allow the renaming of Reg between FirstMI and MI // and combine MI into FirstMI; otherwise bail and keep looking. @@ -1904,6 +1923,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() << "Can't find reg for renaming, " + << "keep looking.\n"); continue; } Flags.setRenameReg(*RenameReg); @@ -1919,10 +1940,15 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. - if (!(MayLoad && - !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) && - !mayAlias(FirstMI, MemInsns, AA)) { + RtNotModified = !( + MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())); + + LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n" + << "Reg '" << getLdStRegOp(FirstMI) + << "' not modified: " + << (RtNotModified ? "true" : "false") << "\n"); + if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) { if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) { Flags.setMergeForward(true); Flags.clearRenameReg(); @@ -1938,8 +1964,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, MBBIWithRenameReg = MBBI; } } - // Unable to combine these instructions due to interference in between. - // Keep looking. + LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to " + << "interference in between, keep looking.\n"); } } @@ -1948,16 +1974,20 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. - if (MI.isCall()) + if (MI.isCall()) { + LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n"); return E; + } // Update modified / uses register units. LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. - if (!ModifiedRegUnits.available(BaseReg)) + if (!ModifiedRegUnits.available(BaseReg)) { + LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n"); return E; + } // Update list of instructions that read/write memory. if (MI.mayLoadOrStore()) diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp index 6fcd9c290e9c..6c6cd120b035 100644 --- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp @@ -53,7 +53,7 @@ using namespace PatternMatch; #define DEBUG_TYPE "aarch64-loop-idiom-transform" static cl::opt<bool> - DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(true), + DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false), cl::desc("Disable AArch64 Loop Idiom Transform Pass.")); static cl::opt<bool> DisableByteCmp( diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index ee10a7d1c706..4782ad076c60 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1397,17 +1397,17 @@ let Predicates = [HasSVEorSME] in { (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>; } - // ld1quw/st1quw + // ld1quw/st1qw defm : sve_ld1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>; defm : sve_ld1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>; - defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; - defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; + defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; + defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; - // ld1qud/st1qud + // ld1qud/st1qd defm : sve_ld1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>; defm : sve_ld1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>; - defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; - defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; + defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; + defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; } // End HasSVEorSME @@ -4006,7 +4006,9 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>; defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>; defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>; defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>; +} // End HasSVE2p1_or_HasSME2 +let Predicates = [HasSVEorSME] in { // Aliases for existing SVE instructions for which predicate-as-counter are // accepted as an operand to the instruction @@ -4025,7 +4027,7 @@ def : InstAlias<"mov $Pd, $Pn", def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>; -} // End HasSVE2p1_or_HasSME2 +} //===----------------------------------------------------------------------===// // Non-widening BFloat16 to BFloat16 instructions @@ -4095,7 +4097,7 @@ defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>; defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>; defm DUPQ_ZZI : sve2p1_dupq<"dupq">; -defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>; +defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>; defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>; defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b5b8b6829178..13b5e578391d 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1406,9 +1406,23 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II, return &II; } +// Simplify operations where predicate has all inactive lanes or try to replace +// with _u form when all lanes are active +static std::optional<Instruction *> +instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, + Intrinsic::ID IID) { + if (match(II.getOperand(0), m_ZeroInt())) { + // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are + // inactive for sv[func]_m + return IC.replaceInstUsesWith(II, II.getOperand(1)); + } + return instCombineSVEAllActive(II, IID); +} + static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u)) return II_U; if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul, Intrinsic::aarch64_sve_mla>( @@ -1423,7 +1437,8 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC, static std::optional<Instruction *> instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u)) return II_U; if (auto FMLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul, @@ -1465,7 +1480,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) { static std::optional<Instruction *> instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u)) return II_U; if (auto FMLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul, @@ -1507,7 +1523,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) { static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u)) return II_U; if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul, Intrinsic::aarch64_sve_mls>( @@ -1523,11 +1540,6 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC, auto *OpMultiplicand = II.getOperand(1); auto *OpMultiplier = II.getOperand(2); - // Canonicalise a non _u intrinsic only. - if (II.getIntrinsicID() != IID) - if (auto II_U = instCombineSVEAllActive(II, IID)) - return II_U; - // Return true if a given instruction is a unit splat value, false otherwise. auto IsUnitSplat = [](auto *I) { auto *SplatValue = getSplatValue(I); @@ -1891,34 +1903,38 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); case Intrinsic::aarch64_sve_fabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u); case Intrinsic::aarch64_sve_fadd: return instCombineSVEVectorFAdd(IC, II); case Intrinsic::aarch64_sve_fadd_u: return instCombineSVEVectorFAddU(IC, II); case Intrinsic::aarch64_sve_fdiv: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u); case Intrinsic::aarch64_sve_fmax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u); case Intrinsic::aarch64_sve_fmaxnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u); case Intrinsic::aarch64_sve_fmin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u); case Intrinsic::aarch64_sve_fminnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u); case Intrinsic::aarch64_sve_fmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u); case Intrinsic::aarch64_sve_fmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u); case Intrinsic::aarch64_sve_fmul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmulx: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u); case Intrinsic::aarch64_sve_fnmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u); case Intrinsic::aarch64_sve_fnmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u); case Intrinsic::aarch64_sve_fsub: return instCombineSVEVectorFSub(IC, II); case Intrinsic::aarch64_sve_fsub_u: @@ -1930,20 +1946,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, Intrinsic::aarch64_sve_mla_u>( IC, II, true); case Intrinsic::aarch64_sve_mla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u); case Intrinsic::aarch64_sve_mls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u); case Intrinsic::aarch64_sve_mul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_mul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_sabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u); case Intrinsic::aarch64_sve_smax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u); case Intrinsic::aarch64_sve_smin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u); case Intrinsic::aarch64_sve_smulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u); case Intrinsic::aarch64_sve_sub: return instCombineSVEVectorSub(IC, II); case Intrinsic::aarch64_sve_sub_u: @@ -1951,31 +1971,31 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, Intrinsic::aarch64_sve_mls_u>( IC, II, true); case Intrinsic::aarch64_sve_uabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u); case Intrinsic::aarch64_sve_umax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u); case Intrinsic::aarch64_sve_umin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u); case Intrinsic::aarch64_sve_umulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u); case Intrinsic::aarch64_sve_asr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u); case Intrinsic::aarch64_sve_lsl: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u); case Intrinsic::aarch64_sve_lsr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u); case Intrinsic::aarch64_sve_and: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u); case Intrinsic::aarch64_sve_bic: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u); case Intrinsic::aarch64_sve_eor: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u); case Intrinsic::aarch64_sve_orr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u); case Intrinsic::aarch64_sve_sqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u); case Intrinsic::aarch64_sve_uqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); case Intrinsic::aarch64_sve_uunpkhi: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index b657a0954d78..302116447efc 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1166,7 +1166,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_FMAD).lower(); // Access to floating-point environment. - getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) + getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV, + G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) .libcall(); getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 496ab18e9b19..6e074b6a63c4 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -120,7 +120,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, assert((!Target.getSymA() || Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None || - Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT) && + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT || + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL) && "Should only be expression-level modifiers here"); assert((!Target.getSymB() || @@ -206,7 +207,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, case FK_Data_2: return R_CLS(ABS16); case FK_Data_4: - return R_CLS(ABS32); + return (!IsILP32 && + Target.getAccessVariant() == MCSymbolRefExpr::VK_GOTPCREL) + ? ELF::R_AARCH64_GOTPCREL32 + : R_CLS(ABS32); case FK_Data_8: if (IsILP32) { Ctx.reportError(Fixup.getLoc(), diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 70f3c2c99f0f..44d9a8ac7cb6 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> { } class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty> - : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd), + : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zm; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 0c77fe725958..b9411e205212 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -111,7 +111,7 @@ def smulu64 : GICombineRule< [{ return matchCombine_s_mul_u64(*${smul}, ${matchinfo}); }]), (apply [{ applyCombine_s_mul_u64(*${smul}, ${matchinfo}); }])>; -def sign_exension_in_reg_matchdata : GIDefMatchData<"MachineInstr *">; +def sign_exension_in_reg_matchdata : GIDefMatchData<"std::pair<MachineInstr *, unsigned>">; def sign_extension_in_reg : GICombineRule< (defs root:$sign_inreg, sign_exension_in_reg_matchdata:$matchinfo), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 719ae2e8750c..41462d7a133e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1579,13 +1579,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode, SDValue &SOffset) const { - if (Subtarget->hasRestrictedSOffset()) { - if (auto SOffsetConst = dyn_cast<ConstantSDNode>(ByteOffsetNode)) { - if (SOffsetConst->isZero()) { - SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32); - return true; - } - } + if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) { + SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32); + return true; } SOffset = ByteOffsetNode; @@ -2483,7 +2479,7 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { SDValue PtrBase = Ptr.getOperand(0); SDValue PtrOffset = Ptr.getOperand(1); - const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue(); + const APInt &OffsetVal = PtrOffset->getAsAPIntVal(); if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) { N = glueCopyToM0(N, PtrBase); Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index d2a02143e4e7..5762f1906a16 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -1026,6 +1026,51 @@ public: return N; } + /// Strip "amdgpu-no-lds-kernel-id" from any functions where we may have + /// introduced its use. If AMDGPUAttributor ran prior to the pass, we inferred + /// the lack of llvm.amdgcn.lds.kernel.id calls. + void removeNoLdsKernelIdFromReachable(CallGraph &CG, Function *KernelRoot) { + KernelRoot->removeFnAttr("amdgpu-no-lds-kernel-id"); + + SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()}); + if (!Tmp.back()) + return; + + SmallPtrSet<Function *, 8> Visited; + bool SeenUnknownCall = false; + + do { + Function *F = Tmp.pop_back_val(); + + for (auto &N : *CG[F]) { + if (!N.second) + continue; + + Function *Callee = N.second->getFunction(); + if (!Callee) { + if (!SeenUnknownCall) { + SeenUnknownCall = true; + + // If we see any indirect calls, assume nothing about potential + // targets. + // TODO: This could be refined to possible LDS global users. + for (auto &N : *CG.getExternalCallingNode()) { + Function *PotentialCallee = N.second->getFunction(); + if (!isKernelLDS(PotentialCallee)) + PotentialCallee->removeFnAttr("amdgpu-no-lds-kernel-id"); + } + + continue; + } + } + + Callee->removeFnAttr("amdgpu-no-lds-kernel-id"); + if (Visited.insert(Callee).second) + Tmp.push_back(Callee); + } + } while (!Tmp.empty()); + } + DenseMap<Function *, GlobalVariable *> lowerDynamicLDSVariables( Module &M, LDSUsesInfoTy &LDSUsesInfo, DenseSet<Function *> const &KernelsThatIndirectlyAllocateDynamicLDS, @@ -1175,6 +1220,13 @@ public: M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement); replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered, LookupTable); + + // Strip amdgpu-no-lds-kernel-id from all functions reachable from the + // kernel. We may have inferred this wasn't used prior to the pass. + // + // TODO: We could filter out subgraphs that do not access LDS globals. + for (Function *F : KernelsThatAllocateTableLDS) + removeNoLdsKernelIdFromReachable(CG, F); } DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index 21bfab52c6c4..bb1d6cb72e80 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -99,10 +99,10 @@ public: // Combine unsigned buffer load and signed extension instructions to generate // signed buffer laod instructions. - bool matchCombineSignExtendInReg(MachineInstr &MI, - MachineInstr *&MatchInfo) const; - void applyCombineSignExtendInReg(MachineInstr &MI, - MachineInstr *&MatchInfo) const; + bool matchCombineSignExtendInReg( + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const; + void applyCombineSignExtendInReg( + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const; // Find the s_mul_u64 instructions where the higher bits are either // zero-extended or sign-extended. @@ -395,34 +395,36 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize( // Identify buffer_load_{u8, u16}. bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg( - MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const { - Register Op0Reg = MI.getOperand(1).getReg(); - SubwordBufferLoad = MRI.getVRegDef(Op0Reg); - - if (!MRI.hasOneNonDBGUse(Op0Reg)) + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const { + Register LoadReg = MI.getOperand(1).getReg(); + if (!MRI.hasOneNonDBGUse(LoadReg)) return false; // Check if the first operand of the sign extension is a subword buffer load // instruction. - return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE || - SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT; + MachineInstr *LoadMI = MRI.getVRegDef(LoadReg); + int64_t Width = MI.getOperand(2).getImm(); + switch (LoadMI->getOpcode()) { + case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE: + MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE}; + return Width == 8; + case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT: + MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT}; + return Width == 16; + } + return false; } // Combine buffer_load_{u8, u16} and the sign extension instruction to generate // buffer_load_{i8, i16}. void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg( - MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const { - // Modify the opcode and the destination of buffer_load_{u8, u16}: - // Replace the opcode. - unsigned Opc = - SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE - ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE - : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT; - SubwordBufferLoad->setDesc(TII.get(Opc)); - // Update the destination register of SubwordBufferLoad with the destination - // register of the sign extension. + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const { + auto [LoadMI, NewOpcode] = MatchData; + LoadMI->setDesc(TII.get(NewOpcode)); + // Update the destination register of the load with the destination register + // of the sign extension. Register SignExtendInsnDst = MI.getOperand(0).getReg(); - SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst); + LoadMI->getOperand(0).setReg(SignExtendInsnDst); // Remove the sign extension. MI.eraseFromParent(); } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index b7f043860115..ba79affe683d 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1342,10 +1342,8 @@ private: unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); bool ParseRegRange(unsigned& Num, unsigned& Width); - unsigned getRegularReg(RegisterKind RegKind, - unsigned RegNum, - unsigned RegWidth, - SMLoc Loc); + unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg, + unsigned RegWidth, SMLoc Loc); bool isRegister(); bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; @@ -2616,6 +2614,8 @@ AMDGPUAsmParser::isRegister(const AsmToken &Token, StringRef RegName = Reg->Name; StringRef RegSuffix = Str.substr(RegName.size()); if (!RegSuffix.empty()) { + RegSuffix.consume_back(".l"); + RegSuffix.consume_back(".h"); unsigned Num; // A single register with an index: rXX if (getRegNum(RegSuffix, Num)) @@ -2636,12 +2636,9 @@ AMDGPUAsmParser::isRegister() return isRegister(getToken(), peekToken()); } -unsigned -AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, - unsigned RegNum, - unsigned RegWidth, - SMLoc Loc) { - +unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, + unsigned SubReg, unsigned RegWidth, + SMLoc Loc) { assert(isRegularReg(RegKind)); unsigned AlignSize = 1; @@ -2670,7 +2667,17 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, return AMDGPU::NoRegister; } - return RC.getRegister(RegIdx); + unsigned Reg = RC.getRegister(RegIdx); + + if (SubReg) { + Reg = TRI->getSubReg(Reg, SubReg); + + // Currently all regular registers have their .l and .h subregisters, so + // we should never need to generate an error here. + assert(Reg && "Invalid subregister!"); + } + + return Reg; } bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { @@ -2748,7 +2755,17 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, RegKind = RI->Kind; StringRef RegSuffix = RegName.substr(RI->Name.size()); + unsigned SubReg = NoSubRegister; if (!RegSuffix.empty()) { + // We don't know the opcode till we are done parsing, so we don't know if + // registers should be 16 or 32 bit. It is therefore mandatory to put .l or + // .h to correctly specify 16 bit registers. We also can't determine class + // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16. + if (RegSuffix.consume_back(".l")) + SubReg = AMDGPU::lo16; + else if (RegSuffix.consume_back(".h")) + SubReg = AMDGPU::hi16; + // Single 32-bit register: vXX. if (!getRegNum(RegSuffix, RegNum)) { Error(Loc, "invalid register index"); @@ -2761,7 +2778,7 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, return AMDGPU::NoRegister; } - return getRegularReg(RegKind, RegNum, RegWidth, Loc); + return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc); } unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, @@ -2813,7 +2830,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, } if (isRegularReg(RegKind)) - Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); + Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc); return Reg; } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index a7d8ff0242b8..bcd93e30d6c2 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1450,20 +1450,27 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) { return false; return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI); }; - auto IsExpiredFn = [](const MachineInstr &I, int) { + bool LdsdirCanWait = ST.hasLdsWaitVMSRC(); + auto IsExpiredFn = [this, LdsdirCanWait](const MachineInstr &I, int) { return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) || (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) || (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0); + AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0) || + (LdsdirCanWait && SIInstrInfo::isLDSDIR(I) && + !TII.getNamedOperand(I, AMDGPU::OpName::waitvsrc)->getImm()); }; if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == std::numeric_limits<int>::max()) return false; - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII.get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); + if (LdsdirCanWait) { + TII.getNamedOperand(*MI, AMDGPU::OpName::waitvsrc)->setImm(0); + } else { + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII.get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); + } return true; } diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f6f37f5170a4..85d062a9a6f5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1128,6 +1128,8 @@ public: bool hasLdsDirect() const { return getGeneration() >= GFX11; } + bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; } + bool hasVALUPartialForwardingHazard() const { return getGeneration() >= GFX11; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index d539d75fdff0..201cc8d01e2d 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -31,7 +31,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT, InlineAsmEnd = ";#ASMEND"; //===--- Data Emission Directives -------------------------------------===// - SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; //===--- Global Variable Emission Directives --------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6ddc7e864fb2..5a9222e91588 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8181,12 +8181,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // SGPR_NULL to avoid generating an extra s_mov with zero. static SDValue selectSOffset(SDValue SOffset, SelectionDAG &DAG, const GCNSubtarget *Subtarget) { - if (Subtarget->hasRestrictedSOffset()) - if (auto SOffsetConst = dyn_cast<ConstantSDNode>(SOffset)) { - if (SOffsetConst->isZero()) { - return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32); - } - } + if (Subtarget->hasRestrictedSOffset() && isNullConstant(SOffset)) + return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32); return SOffset; } diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 1cb1d32707f2..1f480c248154 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -292,7 +292,7 @@ public: VgprVmemTypes[GprNo] = 0; } - void setNonKernelFunctionInitialState() { + void setStateOnFunctionEntryOrReturn() { setScoreUB(VS_CNT, getWaitCountMax(VS_CNT)); PendingEvents |= WaitEventMaskForInst[VS_CNT]; } @@ -1487,6 +1487,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, if (callWaitsOnFunctionReturn(Inst)) { // Act as a wait on everything ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt()); + ScoreBrackets->setStateOnFunctionEntryOrReturn(); } else { // May need to way wait for anything. ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt()); @@ -1879,7 +1880,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { auto NonKernelInitialState = std::make_unique<WaitcntBrackets>(ST, Limits, Encoding); - NonKernelInitialState->setNonKernelFunctionInitialState(); + NonKernelInitialState->setStateOnFunctionEntryOrReturn(); BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState); Modified = true; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index fee900b3efb2..e50f5f28e030 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5276,10 +5276,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64; case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64; case AMDGPU::S_RNDNE_F32: return AMDGPU::V_RNDNE_F32_e64; - case AMDGPU::S_CEIL_F16: return AMDGPU::V_CEIL_F16_t16_e64; - case AMDGPU::S_FLOOR_F16: return AMDGPU::V_FLOOR_F16_t16_e64; - case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_t16_e64; - case AMDGPU::S_RNDNE_F16: return AMDGPU::V_RNDNE_F16_t16_e64; + case AMDGPU::S_CEIL_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64 + : AMDGPU::V_CEIL_F16_fake16_e64; + case AMDGPU::S_FLOOR_F16: + return AMDGPU::V_FLOOR_F16_fake16_e64; + case AMDGPU::S_TRUNC_F16: + return AMDGPU::V_TRUNC_F16_fake16_e64; + case AMDGPU::S_RNDNE_F16: + return AMDGPU::V_RNDNE_F16_fake16_e64; case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64; case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64; case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64; @@ -5328,15 +5333,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64; case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64; case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64; - case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_t16_e64; + case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64; case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64; - case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_t16_e64; + case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64; case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64; - case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_t16_e64; + case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64; case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64; - case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_t16_e64; + case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64; case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64; - case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_t16_e64; + case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64; } llvm_unreachable( "Unexpected scalar opcode without corresponding vector one!"); @@ -7266,8 +7271,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >= 0) NewInstr.addImm(0); - if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0) >= 0) - NewInstr->addOperand(Inst.getOperand(1)); + if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0)) { + MachineOperand Src = Inst.getOperand(1); + if (AMDGPU::isTrue16Inst(NewOpcode) && ST.useRealTrue16Insts() && + Src.isReg() && RI.isVGPR(MRI, Src.getReg())) + NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16); + else + NewInstr->addOperand(Src); + } if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { // We are converting these to a BFE, so we need to add the missing diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f07b8fa0ea4c..04c92155f5aa 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1773,28 +1773,27 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel, - bit IsVOP3P> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> { // getInst64 handles clamp and omod. implicit mutex between vop3p and omod dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; dag opsel = (ins op_sel0:$op_sel); - dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); - dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi)); - - dag ret = !con(base, - !if(HasOpSel, opsel,(ins)), - !if(IsVOP3P, vop3pFields,(ins))); + dag ret = !con(base, !if(HasOpSel, opsel, (ins))); } class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { - dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, + dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, - 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, - HasOpSel, 1/*IsVOP3P*/>.ret; + 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret; + + dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); + dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi); + + dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg); + dag ret = !con(base, vop3pFields); } class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, @@ -1804,7 +1803,7 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, - Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret; + Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret; } class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, @@ -2390,9 +2389,15 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; - field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, + defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, - Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret; + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret; + defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP, + Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret; + + field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase); + field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 27a7c29cb1ac..99960c94e598 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -74,6 +74,7 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; + let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; let AsmVariantName = ps.AsmVariantName; let Constraints = ps.Constraints; @@ -157,8 +158,11 @@ multiclass VOP1Inst_t16<string opName, let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { defm NAME : VOP1Inst<opName, P, node>; } - let OtherPredicates = [HasTrue16BitInsts] in { - defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>; + let OtherPredicates = [UseRealTrue16Insts] in { + defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>; + } + let OtherPredicates = [UseFakeTrue16Insts] in { + defm _fake16 : VOP1Inst<opName#"_fake16", VOPProfile_Fake16<P>, node>; } } @@ -679,6 +683,7 @@ class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1 let SchedRW = ps.SchedRW; let Uses = ps.Uses; let TRANS = ps.TRANS; + let OtherPredicates = ps.OtherPredicates; bits<8> vdst; let Inst{8-0} = 0xfa; @@ -707,6 +712,7 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : let Defs = ps.Defs; let SchedRW = ps.SchedRW; let Uses = ps.Uses; + let OtherPredicates = ps.OtherPredicates; bits<8> vdst; let Inst{8-0} = fi; @@ -742,7 +748,9 @@ multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> { multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName, string asmName> { defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); - let AsmString = asmName # ps.AsmOperands in { + let AsmString = asmName # ps.AsmOperands, + DecoderNamespace = Gen.DecoderNamespace # + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { defm NAME : VOP1_Real_e32<Gen, op, opName>; } } @@ -761,7 +769,9 @@ multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> { multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName, string asmName> { defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); - let AsmString = asmName # ps.Pfl.AsmDPP16 in { + let AsmString = asmName # ps.Pfl.AsmDPP16, + DecoderNamespace = "DPP" # Gen.DecoderNamespace # + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { defm NAME : VOP1_Real_dpp<Gen, op, opName>; } } @@ -774,7 +784,9 @@ multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> { multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName, string asmName> { defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); - let AsmString = asmName # ps.Pfl.AsmDPP8 in { + let AsmString = asmName # ps.Pfl.AsmDPP8, + DecoderNamespace = "DPP8" # Gen.DecoderNamespace # + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { defm NAME : VOP1_Real_dpp8<Gen, op, opName>; } } @@ -854,29 +866,30 @@ defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b, "V_FFBH_I32", "v_cls_i32">; defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>; defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">; -defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">; -defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">; -defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; +defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">; +defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">; +defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">; defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">; defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">; defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">; -defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; -defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; -defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; -defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; -defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; -defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">; +defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; +defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; +defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; +defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; +defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; +defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">; defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">; -defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; +defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; -defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">; -defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">; -defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">; -defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">; -defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">; -defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; +defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; +defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">; +defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">; +defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">; +defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">; +defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">; +defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">; defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index ecee61daa1c8..48d4e259bc1c 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -111,8 +111,8 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : VOP2_Real <ps, Gen.Subtarget, real_name> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -437,7 +437,7 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v let InsDPP16 = !con(InsDPP, (ins FI:$fi)); let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3, 0, HasModifiers, HasModifiers, HasOMod, - Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel, 0/*IsVOP3P*/>.ret; + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret; // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); let InsVOPDXDeferred = @@ -1275,8 +1275,8 @@ class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget, class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen, string opName = ps.OpName, VOPProfile p = ps.Pfl> : VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = "DPP"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1304,8 +1304,8 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : VOP2_DPP8<op, ps, p> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = "DPP8"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index fd4626d902ac..c4b9e7063093 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -208,8 +208,8 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni class VOP3_Real_Gen <VOP_Pseudo ps, GFXGen Gen, string asm_name = ps.Mnemonic> : VOP3_Real <ps, Gen.Subtarget, asm_name> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1340,8 +1340,8 @@ class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget, class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen, string opName = ps.OpName> : VOP3_DPP16 <op, ps, Gen.Subtarget, opName> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = "DPP"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1470,9 +1470,8 @@ multiclass VOP3_Real_dpp8_with_name<GFXGen Gen, bits<10> op, string opName, let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"), - AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate) in { - + OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], + [TruePredicate]) in { defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>; } } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 568085bd0ab3..f8a281032c77 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -9577,8 +9577,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { SmallVector<SDValue, 8> Ops; SDLoc dl(N); for (unsigned i = 0; i != NumElts; ++i) { - ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); - const APInt &CInt = C->getAPIntValue(); + const APInt &CInt = N->getConstantOperandAPInt(i); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); @@ -18080,8 +18079,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue Op0 = CMOV->getOperand(0); SDValue Op1 = CMOV->getOperand(1); - auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2)); - auto CC = CCNode->getAPIntValue().getLimitedValue(); + auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue(); SDValue CmpZ = CMOV->getOperand(4); // The compare must be against zero. @@ -20109,8 +20107,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // The operand to BFI is already a mask suitable for removing the bits it // sets. - ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); - const APInt &Mask = CI->getAPIntValue(); + const APInt &Mask = Op.getConstantOperandAPInt(2); Known.Zero &= Mask; Known.One &= Mask; return; diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index e68904863cfc..fc066f001316 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -1149,15 +1149,10 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // but they are different from CMP. // FIXME: since we're doing a post-processing, use a pseudoinstr here, so // lowering & isel wouldn't diverge. - bool andCC = false; - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { - if (RHSC->isZero() && LHS.hasOneUse() && - (LHS.getOpcode() == ISD::AND || - (LHS.getOpcode() == ISD::TRUNCATE && - LHS.getOperand(0).getOpcode() == ISD::AND))) { - andCC = true; - } - } + bool andCC = isNullConstant(RHS) && LHS.hasOneUse() && + (LHS.getOpcode() == ISD::AND || + (LHS.getOpcode() == ISD::TRUNCATE && + LHS.getOperand(0).getOpcode() == ISD::AND)); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDValue TargetCC; SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index c65090d915ef..34c5569b8076 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2019,9 +2019,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DL, RetTy, Args, Outs, retAlignment, HasVAArgs ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair( - CLI.NumFixedArgs, - cast<ConstantSDNode>(VADeclareParam->getOperand(1)) - ->getAPIntValue())) + CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1))) : std::nullopt, *CB, UniqueCallSite); const char *ProtoStr = nvTM->getStrPool().save(Proto).data(); @@ -2297,7 +2295,7 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (VT == MVT::v2f16 || VT == MVT::v2bf16) Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt(); else if (VT == MVT::v2i16 || VT == MVT::v4i8) - Value = cast<ConstantSDNode>(Operand)->getAPIntValue(); + Value = Operand->getAsAPIntVal(); else llvm_unreachable("Unsupported type"); // i8 values are carried around as i16, so we need to zero out upper bits, diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 13665985f52e..e1cced327544 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -164,6 +164,9 @@ def True : Predicate<"true">; class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>; class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>; +// Explicit records for arch-accelerated SM versions +def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">; + // non-sync shfl instructions are not available on sm_70+ in PTX6.4+ def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" "&& Subtarget->getPTXVersion() >= 64)">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 85eae44f349a..6b062a7f3912 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -6727,3 +6727,16 @@ def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), "mov.pred\t$d, %is_explicit_cluster;", [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>, Requires<[hasSM<90>, hasPTX<78>]>; + +// setmaxnreg inc/dec intrinsics +let isConvergent = true in { +multiclass SET_MAXNREG<string Action, Intrinsic Intr> { + def : NVPTXInst<(outs), (ins i32imm:$reg_count), + "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;", + [(Intr timm:$reg_count)]>, + Requires<[hasSM90a, hasPTX<80>]>; +} + +defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>; +defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>; +} // isConvergent diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 235df1880b37..4e164fda1d8d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16241,7 +16241,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Since we are doing this pre-legalize, the RHS can be a constant of // arbitrary bitwidth which may cause issues when trying to get the value // from the underlying APInt. - auto RHSAPInt = cast<ConstantSDNode>(RHS)->getAPIntValue(); + auto RHSAPInt = RHS->getAsAPIntVal(); if (!RHSAPInt.isIntN(64)) break; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index b1601739fd45..bf756e39bd5d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1909,7 +1909,7 @@ def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$RST, gprc:$RA, u5imm:$RB), "stwat $RST, $RA, $RB", IIC_LdStStore>, Requires<[IsISA3_0]>; -let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in +let isTrap = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; def TWI : DForm_base<3, (outs), (ins u5imm:$RST, gprc:$RA, s16imm:$D, variable_ops), diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index d616aaeddf41..7d42481db57f 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -199,6 +199,8 @@ class RISCVAsmParser : public MCTargetAsmParser { ParseStatus parseInsnDirectiveOpcode(OperandVector &Operands); ParseStatus parseInsnCDirectiveOpcode(OperandVector &Operands); ParseStatus parseGPRAsFPR(OperandVector &Operands); + template <bool IsRV64Inst> ParseStatus parseGPRPair(OperandVector &Operands); + ParseStatus parseGPRPair(OperandVector &Operands, bool IsRV64Inst); ParseStatus parseFRMArg(OperandVector &Operands); ParseStatus parseFenceArg(OperandVector &Operands); ParseStatus parseReglist(OperandVector &Operands); @@ -466,6 +468,12 @@ public: bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; } + bool isGPRPair() const { + return Kind == KindTy::Register && + RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains( + Reg.RegNum); + } + static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm, RISCVMCExpr::VariantKind &VK) { if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) { @@ -1295,11 +1303,15 @@ unsigned RISCVAsmParser::checkTargetMatchPredicate(MCInst &Inst) { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); for (unsigned I = 0; I < MCID.NumOperands; ++I) { - if (MCID.operands()[I].RegClass == RISCV::GPRPF64RegClassID) { + if (MCID.operands()[I].RegClass == RISCV::GPRPairRegClassID) { const auto &Op = Inst.getOperand(I); assert(Op.isReg()); MCRegister Reg = Op.getReg(); + if (RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(Reg)) + continue; + + // FIXME: We should form a paired register during parsing/matching. if (((Reg.id() - RISCV::X0) & 1) != 0) return Match_RequiresEvenGPRs; } @@ -2222,6 +2234,48 @@ ParseStatus RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) { return ParseStatus::Success; } +template <bool IsRV64> +ParseStatus RISCVAsmParser::parseGPRPair(OperandVector &Operands) { + return parseGPRPair(Operands, IsRV64); +} + +ParseStatus RISCVAsmParser::parseGPRPair(OperandVector &Operands, + bool IsRV64Inst) { + // If this is not an RV64 GPRPair instruction, don't parse as a GPRPair on + // RV64 as it will prevent matching the RV64 version of the same instruction + // that doesn't use a GPRPair. + // If this is an RV64 GPRPair instruction, there is no RV32 version so we can + // still parse as a pair. + if (!IsRV64Inst && isRV64()) + return ParseStatus::NoMatch; + + if (getLexer().isNot(AsmToken::Identifier)) + return ParseStatus::NoMatch; + + StringRef Name = getLexer().getTok().getIdentifier(); + MCRegister RegNo = matchRegisterNameHelper(isRVE(), Name); + + if (!RegNo) + return ParseStatus::NoMatch; + + if (!RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(RegNo)) + return ParseStatus::NoMatch; + + if ((RegNo - RISCV::X0) & 1) + return TokError("register must be even"); + + SMLoc S = getLoc(); + SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size()); + getLexer().Lex(); + + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + unsigned Pair = RI->getMatchingSuperReg( + RegNo, RISCV::sub_gpr_even, + &RISCVMCRegisterClasses[RISCV::GPRPairRegClassID]); + Operands.push_back(RISCVOperand::createReg(Pair, S, E)); + return ParseStatus::Success; +} + ParseStatus RISCVAsmParser::parseFRMArg(OperandVector &Operands) { if (getLexer().isNot(AsmToken::Identifier)) return TokError( @@ -3335,27 +3389,6 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst, return Error(Loc, "Operand must be constant 4."); } - bool IsAMOCAS_D = Opcode == RISCV::AMOCAS_D || Opcode == RISCV::AMOCAS_D_AQ || - Opcode == RISCV::AMOCAS_D_RL || - Opcode == RISCV::AMOCAS_D_AQ_RL; - bool IsAMOCAS_Q = Opcode == RISCV::AMOCAS_Q || Opcode == RISCV::AMOCAS_Q_AQ || - Opcode == RISCV::AMOCAS_Q_RL || - Opcode == RISCV::AMOCAS_Q_AQ_RL; - if ((!isRV64() && IsAMOCAS_D) || IsAMOCAS_Q) { - unsigned Rd = Inst.getOperand(0).getReg(); - unsigned Rs2 = Inst.getOperand(2).getReg(); - assert(Rd >= RISCV::X0 && Rd <= RISCV::X31); - if ((Rd - RISCV::X0) % 2 != 0) { - SMLoc Loc = Operands[1]->getStartLoc(); - return Error(Loc, "The destination register must be even."); - } - assert(Rs2 >= RISCV::X0 && Rs2 <= RISCV::X31); - if ((Rs2 - RISCV::X0) % 2 != 0) { - SMLoc Loc = Operands[2]->getStartLoc(); - return Error(Loc, "The source register must be even."); - } - } - const MCInstrDesc &MCID = MII.get(Opcode); if (!(MCID.TSFlags & RISCVII::ConstraintMask)) return false; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index ed80da14c795..4dd039159e29 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -171,7 +171,7 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint32_t RegNo, +static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { if (RegNo >= 32 || RegNo & 1) @@ -546,6 +546,10 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, !STI.hasFeature(RISCV::Feature64Bit), DecoderTableRV32Zdinx32, "RV32Zdinx table (Double in Integer and rv32)"); + TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZacas) && + !STI.hasFeature(RISCV::Feature64Bit), + DecoderTableRV32Zacas32, + "RV32Zacas table (Compare-And-Swap and rv32)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32, "RVZfinx table (Float in Integer)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps, diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index ab8070772fe5..ae02e86baf6e 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -47,10 +47,50 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + const LLT nxv1s8 = LLT::scalable_vector(1, s8); + const LLT nxv2s8 = LLT::scalable_vector(2, s8); + const LLT nxv4s8 = LLT::scalable_vector(4, s8); + const LLT nxv8s8 = LLT::scalable_vector(8, s8); + const LLT nxv16s8 = LLT::scalable_vector(16, s8); + const LLT nxv32s8 = LLT::scalable_vector(32, s8); + const LLT nxv64s8 = LLT::scalable_vector(64, s8); + + const LLT nxv1s16 = LLT::scalable_vector(1, s16); + const LLT nxv2s16 = LLT::scalable_vector(2, s16); + const LLT nxv4s16 = LLT::scalable_vector(4, s16); + const LLT nxv8s16 = LLT::scalable_vector(8, s16); + const LLT nxv16s16 = LLT::scalable_vector(16, s16); + const LLT nxv32s16 = LLT::scalable_vector(32, s16); + + const LLT nxv1s32 = LLT::scalable_vector(1, s32); + const LLT nxv2s32 = LLT::scalable_vector(2, s32); + const LLT nxv4s32 = LLT::scalable_vector(4, s32); + const LLT nxv8s32 = LLT::scalable_vector(8, s32); + const LLT nxv16s32 = LLT::scalable_vector(16, s32); + + const LLT nxv1s64 = LLT::scalable_vector(1, s64); + const LLT nxv2s64 = LLT::scalable_vector(2, s64); + const LLT nxv4s64 = LLT::scalable_vector(4, s64); + const LLT nxv8s64 = LLT::scalable_vector(8, s64); + using namespace TargetOpcode; + auto AllVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8, + nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16, + nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32, + nxv1s64, nxv2s64, nxv4s64, nxv8s64}; + getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR}) .legalFor({s32, sXLen}) + .legalIf(all( + typeInSet(0, AllVecTys), + LegalityPredicate([=, &ST](const LegalityQuery &Query) { + return ST.hasVInstructions() && + (Query.Types[0].getScalarSizeInBits() != 64 || + ST.hasVInstructionsI64()) && + (Query.Types[0].getElementCount().getKnownMinValue() != 1 || + ST.getELen() == 64); + }))) .widenScalarToNextPow2(0) .clampScalar(0, s32, sXLen); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index 0799267eaf7c..76e5b3ed4025 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -106,6 +106,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, if (Expr->getKind() == MCExpr::Target && cast<RISCVMCExpr>(Expr)->getKind() == RISCVMCExpr::VK_RISCV_32_PCREL) return ELF::R_RISCV_32_PCREL; + if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL) + return ELF::R_RISCV_GOT32_PCREL; return ELF::R_RISCV_32; case FK_Data_8: return ELF::R_RISCV_64; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index 9db5148208b3..961b8f0afe22 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -37,6 +37,13 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S, auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend()); setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features, MAB.getTargetOptions().getABIName())); + // `j label` in `.option norelax; j label; .option relax; ...; label:` needs a + // relocation to ensure the jump target is correct after linking. This is due + // to a limitation that shouldForceRelocation has to make the decision upfront + // without knowing a possibly future .option relax. When RISCVAsmParser is used, + // its ParseInstruction may call setForceRelocs as well. + if (STI.hasFeature(RISCV::FeatureRelax)) + static_cast<RISCVAsmBackend &>(MAB).setForceRelocs(); } RISCVELFStreamer &RISCVTargetELFStreamer::getStreamer() { diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 103a2e2da7b9..ed2b1ceb7d6f 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -308,8 +308,10 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { DebugLoc DL = MBBI->getDebugLoc(); const TargetRegisterInfo *TRI = STI->getRegisterInfo(); - Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32); - Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi); + Register Lo = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even); + Register Hi = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd); BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW)) .addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill())) .addReg(MBBI->getOperand(1).getReg()) @@ -342,8 +344,10 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { DebugLoc DL = MBBI->getDebugLoc(); const TargetRegisterInfo *TRI = STI->getRegisterInfo(); - Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32); - Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi); + Register Lo = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even); + Register Hi = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd); // If the register of operand 1 is equal to the Lo register, then swap the // order of loading the Lo and Hi statements. diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index bb7a3291085d..279509575bb5 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -736,6 +736,7 @@ def FeatureStdExtZacas def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">, AssemblerPredicate<(all_of FeatureStdExtZacas), "'Zacas' (Atomic Compare-And-Swap Instructions)">; +def NoStdExtZacas : Predicate<"!Subtarget->hasStdExtZacas()">; //===----------------------------------------------------------------------===// // Vendor extensions diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 0a1a466af591..cb9ffabc4123 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -138,7 +138,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.is64Bit()) addRegisterClass(MVT::f64, &RISCV::GPRRegClass); else - addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass); + addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass); } static const MVT::SimpleValueType BoolVecVTs[] = { @@ -814,8 +814,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, Custom); setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); - setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT, - ISD::SSUBSAT, ISD::USUBSAT}, + setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, + ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal); // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" @@ -1185,8 +1185,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); - setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT, - ISD::SSUBSAT, ISD::USUBSAT}, + setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, + ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -5466,6 +5466,7 @@ static unsigned getRISCVVLOp(SDValue Op) { OP_CASE(SSUBSAT) OP_CASE(USUBSAT) OP_CASE(AVGFLOORU) + OP_CASE(AVGCEILU) OP_CASE(FADD) OP_CASE(FSUB) OP_CASE(FMUL) @@ -5570,7 +5571,7 @@ static bool hasMergeOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == - 125 && + 126 && RISCVISD::LAST_RISCV_STRICTFP_OPCODE - ISD::FIRST_TARGET_STRICTFP_OPCODE == 21 && @@ -5596,7 +5597,7 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == - 125 && + 126 && RISCVISD::LAST_RISCV_STRICTFP_OPCODE - ISD::FIRST_TARGET_STRICTFP_OPCODE == 21 && @@ -6461,6 +6462,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return SplitVectorOp(Op, DAG); [[fallthrough]]; case ISD::AVGFLOORU: + case ISD::AVGCEILU: case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -7023,8 +7025,7 @@ foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, if (!NewConstOp) return SDValue(); - const APInt &NewConstAPInt = - cast<ConstantSDNode>(NewConstOp)->getAPIntValue(); + const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal(); if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) return SDValue(); @@ -7154,8 +7155,8 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { // is SETGE/SETLE to avoid an XORI. if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && CCVal == ISD::SETLT) { - const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue(); - const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue(); + const APInt &TrueVal = TrueV->getAsAPIntVal(); + const APInt &FalseVal = FalseV->getAsAPIntVal(); if (TrueVal - 1 == FalseVal) return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV); if (TrueVal + 1 == FalseVal) @@ -16345,7 +16346,7 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, Register SrcReg = MI.getOperand(2).getReg(); const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX - ? &RISCV::GPRPF64RegClass + ? &RISCV::GPRPairRegClass : &RISCV::FPR64RegClass; int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); @@ -16384,7 +16385,7 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, Register HiReg = MI.getOperand(2).getReg(); const TargetRegisterClass *DstRC = - MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass + MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPairRegClass : &RISCV::FPR64RegClass; int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); @@ -18596,6 +18597,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(UREM_VL) NODE_NAME_CASE(XOR_VL) NODE_NAME_CASE(AVGFLOORU_VL) + NODE_NAME_CASE(AVGCEILU_VL) NODE_NAME_CASE(SADDSAT_VL) NODE_NAME_CASE(UADDSAT_VL) NODE_NAME_CASE(SSUBSAT_VL) @@ -18752,7 +18754,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) return std::make_pair(0U, &RISCV::GPRF32RegClass); if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) - return std::make_pair(0U, &RISCV::GPRPF64RegClass); + return std::make_pair(0U, &RISCV::GPRPairRegClass); return std::make_pair(0U, &RISCV::GPRNoX0RegClass); case 'f': if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) @@ -18934,7 +18936,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // Subtarget into account. if (Res.second == &RISCV::GPRF16RegClass || Res.second == &RISCV::GPRF32RegClass || - Res.second == &RISCV::GPRPF64RegClass) + Res.second == &RISCV::GPRPairRegClass) return std::make_pair(Res.first, &RISCV::GPRRegClass); return Res; @@ -19362,6 +19364,11 @@ bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, return false; } +ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { + // Zacas will use amocas.w which does not require extension. + return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; +} + Register RISCVTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return RISCV::X10; @@ -20017,8 +20024,13 @@ unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, } bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { - // At the moment, the only scalable instruction GISel knows how to lower is - // ret with scalable argument. + + // GISel support is in progress or complete for G_ADD, G_SUB, G_AND, G_OR, and + // G_XOR. + unsigned Op = Inst.getOpcode(); + if (Op == Instruction::Add || Op == Instruction::Sub || + Op == Instruction::And || Op == Instruction::Or || Op == Instruction::Xor) + return false; if (Inst.getType()->isScalableTy()) return true; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 5d51fe168b04..c65953e37b17 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -255,6 +255,8 @@ enum NodeType : unsigned { // Averaging adds of unsigned integers. AVGFLOORU_VL, + // Rounding averaging adds of unsigned integers. + AVGCEILU_VL, MULHS_VL, MULHU_VL, @@ -631,9 +633,7 @@ public: return ISD::SIGN_EXTEND; } - ISD::NodeType getExtendForAtomicCmpSwapArg() const override { - return ISD::SIGN_EXTEND; - } + ISD::NodeType getExtendForAtomicCmpSwapArg() const override; bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index e591aa935c0b..6c9e529e4bfb 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1464,20 +1464,6 @@ static void doUnion(DemandedFields &A, DemandedFields B) { A.MaskPolicy |= B.MaskPolicy; } -static bool isNonZeroAVL(const MachineOperand &MO, - const MachineRegisterInfo &MRI) { - if (MO.isReg()) { - if (MO.getReg() == RISCV::X0) - return true; - if (MachineInstr *MI = MRI.getVRegDef(MO.getReg()); - MI && isNonZeroLoadImmediate(*MI)) - return true; - return false; - } - assert(MO.isImm()); - return 0 != MO.getImm(); -} - // Return true if we can mutate PrevMI to match MI without changing any the // fields which would be observed. static bool canMutatePriorConfig(const MachineInstr &PrevMI, @@ -1491,21 +1477,26 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, if (Used.VLAny) return false; - // We don't bother to handle the equally zero case here as it's largely - // uninteresting. if (Used.VLZeroness) { if (isVLPreservingConfig(PrevMI)) return false; - if (!isNonZeroAVL(MI.getOperand(1), MRI) || - !isNonZeroAVL(PrevMI.getOperand(1), MRI)) + if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI), + MRI)) return false; } - // TODO: Track whether the register is defined between - // PrevMI and MI. - if (MI.getOperand(1).isReg() && - RISCV::X0 != MI.getOperand(1).getReg()) - return false; + auto &AVL = MI.getOperand(1); + auto &PrevAVL = PrevMI.getOperand(1); + assert(MRI.isSSA()); + + // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. + // For now just check that PrevMI uses the same virtual register. + if (AVL.isReg() && AVL.getReg() != RISCV::X0) { + if (AVL.getReg().isPhysical()) + return false; + if (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()) + return false; + } } if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 351f48c1708e..9813c7a70dfc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -414,15 +414,16 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (RISCV::GPRPF64RegClass.contains(DstReg, SrcReg)) { - // Emit an ADDI for both parts of GPRPF64. + if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) { + // Emit an ADDI for both parts of GPRPair. BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), - TRI->getSubReg(DstReg, RISCV::sub_32)) - .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32), getKillRegState(KillSrc)) + TRI->getSubReg(DstReg, RISCV::sub_gpr_even)) + .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even), + getKillRegState(KillSrc)) .addImm(0); BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), - TRI->getSubReg(DstReg, RISCV::sub_32_hi)) - .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32_hi), + TRI->getSubReg(DstReg, RISCV::sub_gpr_odd)) + .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd), getKillRegState(KillSrc)) .addImm(0); return; @@ -607,7 +608,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; IsScalableVector = false; - } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { + } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxSD; IsScalableVector = false; } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { @@ -690,7 +691,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; IsScalableVector = false; - } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { + } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxLD; IsScalableVector = false; } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 4d0567e41abc..44552c00c62e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -157,7 +157,16 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">; defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">; defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">; -let Predicates = [HasStdExtA] in { +defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>; + /// Pseudo AMOs @@ -169,21 +178,6 @@ class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch), let hasSideEffects = 0; } -let Size = 20 in -def PseudoAtomicLoadNand32 : PseudoAMO; -// Ordering constants must be kept in sync with the AtomicOrdering enum in -// AtomicOrdering.h. -def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>; -def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>; -def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>; -def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>; -def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>; - class PseudoMaskedAMO : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> { @@ -224,6 +218,23 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst> (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, timm:$ordering)>; +let Predicates = [HasStdExtA] in { + +let Size = 20 in +def PseudoAtomicLoadNand32 : PseudoAMO; +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>; +def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>; +def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>; +def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>; +def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>; + let Size = 28 in def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32, @@ -256,6 +267,43 @@ let Size = 36 in def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32, PseudoMaskedAtomicLoadUMin32>; +} // Predicates = [HasStdExtA] + +let Predicates = [HasStdExtA, IsRV64] in { + +let Size = 20 in +def PseudoAtomicLoadNand64 : PseudoAMO; +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +def : Pat<(i64 (atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>; +def : Pat<(i64 (atomic_load_nand_64_acquire GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>; +def : Pat<(i64 (atomic_load_nand_64_release GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>; +def : Pat<(i64 (atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>; +def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>; + +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64, + PseudoMaskedAtomicSwap32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64, + PseudoMaskedAtomicLoadAdd32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64, + PseudoMaskedAtomicLoadSub32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64, + PseudoMaskedAtomicLoadNand32>; +def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64, + PseudoMaskedAtomicLoadMax32>; +def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64, + PseudoMaskedAtomicLoadMin32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64, + PseudoMaskedAtomicLoadUMax32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64, + PseudoMaskedAtomicLoadUMin32>; +} // Predicates = [HasStdExtA, IsRV64] + /// Compare and exchange @@ -285,9 +333,17 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst, (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; } +let Predicates = [HasStdExtA, NoStdExtZacas] in { def PseudoCmpXchg32 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; +} + +let Predicates = [HasStdExtA, NoStdExtZacas, IsRV64] in { +def PseudoCmpXchg64 : PseudoCmpXchg; +defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; +} +let Predicates = [HasStdExtA] in { def PseudoMaskedCmpXchg32 : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, @@ -303,60 +359,9 @@ def : Pat<(int_riscv_masked_cmpxchg_i32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; - } // Predicates = [HasStdExtA] -defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>; - let Predicates = [HasStdExtA, IsRV64] in { - -/// 64-bit pseudo AMOs - -let Size = 20 in -def PseudoAtomicLoadNand64 : PseudoAMO; -// Ordering constants must be kept in sync with the AtomicOrdering enum in -// AtomicOrdering.h. -def : Pat<(i64 (atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>; -def : Pat<(i64 (atomic_load_nand_64_acquire GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>; -def : Pat<(i64 (atomic_load_nand_64_release GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>; -def : Pat<(i64 (atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>; -def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>; - -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64, - PseudoMaskedAtomicSwap32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64, - PseudoMaskedAtomicLoadAdd32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64, - PseudoMaskedAtomicLoadSub32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64, - PseudoMaskedAtomicLoadNand32>; -def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64, - PseudoMaskedAtomicLoadMax32>; -def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64, - PseudoMaskedAtomicLoadMin32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64, - PseudoMaskedAtomicLoadUMax32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64, - PseudoMaskedAtomicLoadUMin32>; - -/// 64-bit compare and exchange - -def PseudoCmpXchg64 : PseudoCmpXchg; -defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; - def : Pat<(int_riscv_masked_cmpxchg_i64 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 @@ -408,6 +413,7 @@ defm : AMOPat2<"atomic_load_min_32", "AMOMIN_W", i32>; defm : AMOPat2<"atomic_load_umax_32", "AMOMAXU_W", i32>; defm : AMOPat2<"atomic_load_umin_32", "AMOMINU_W", i32>; +let Predicates = [HasStdExtA, IsRV64] in defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32, i32>; let Predicates = [HasAtomicLdSt] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 418421b2a556..fec43d814098 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -33,8 +33,8 @@ def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmINX">; // Zdinx -def GPRPF64AsFPR : AsmOperandClass { - let Name = "GPRPF64AsFPR"; +def GPRPairAsFPR : AsmOperandClass { + let Name = "GPRPairAsFPR"; let ParserMethod = "parseGPRAsFPR"; let PredicateMethod = "isGPRAsFPR"; let RenderMethod = "addRegOperands"; @@ -52,8 +52,8 @@ def FPR64INX : RegisterOperand<GPR> { let DecoderMethod = "DecodeGPRRegisterClass"; } -def FPR64IN32X : RegisterOperand<GPRPF64> { - let ParserMatchClass = GPRPF64AsFPR; +def FPR64IN32X : RegisterOperand<GPRPair> { + let ParserMatchClass = GPRPairAsFPR; } def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>; @@ -515,15 +515,15 @@ def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>; /// Loads let isCall = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in -def PseudoRV32ZdinxLD : Pseudo<(outs GPRPF64:$dst), (ins GPR:$rs1, simm12:$imm12), []>; +def PseudoRV32ZdinxLD : Pseudo<(outs GPRPair:$dst), (ins GPR:$rs1, simm12:$imm12), []>; def : Pat<(f64 (load (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12))), (PseudoRV32ZdinxLD GPR:$rs1, simm12:$imm12)>; /// Stores let isCall = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in -def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPF64:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>; -def : Pat<(store (f64 GPRPF64:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)), - (PseudoRV32ZdinxSD GPRPF64:$rs2, GPR:$rs1, simm12:$imm12)>; +def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>; +def : Pat<(store (f64 GPRPair:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)), + (PseudoRV32ZdinxSD GPRPair:$rs2, GPR:$rs1, simm12:$imm12)>; /// Pseudo-instructions needed for the soft-float ABI with RV32D diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 4f87c36506e5..8ebd8b89c119 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -877,6 +877,23 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> { } } +multiclass VPatAVGADD_VV_VX_RM<SDNode vop, int vxrm> { + foreach vti = AllIntegerVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, + vxrm, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat (XLenVT GPR:$rs2)))), + (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, + vxrm, vti.AVL, vti.Log2SEW, TA_MA)>; + } + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -1132,20 +1149,8 @@ defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">; defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">; // 12.2. Vector Single-Width Averaging Add and Subtract -foreach vti = AllIntegerVectors in { - let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1), - (vti.Vector vti.RegClass:$rs2)), - (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX) - (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, - 0b10, vti.AVL, vti.Log2SEW, TA_MA)>; - def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1), - (vti.Vector (SplatPat (XLenVT GPR:$rs2)))), - (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX) - (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, - 0b10, vti.AVL, vti.Log2SEW, TA_MA)>; - } -} +defm : VPatAVGADD_VV_VX_RM<avgflooru, 0b10>; +defm : VPatAVGADD_VV_VX_RM<avgceilu, 0b00>; // 15. Vector Mask Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index d60ff4b5fab0..1deb9a709463 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -112,6 +112,7 @@ def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL> def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>; def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; +def riscv_avgceilu_vl : SDNode<"RISCVISD::AVGCEILU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>; @@ -2031,6 +2032,25 @@ multiclass VPatSlide1VL_VF<SDNode vop, string instruction_name> { } } +multiclass VPatAVGADDVL_VV_VX_RM<SDNode vop, int vxrm> { + foreach vti = AllIntegerVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2), + vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat (XLenVT GPR:$rs2))), + vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2, + (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -2308,22 +2328,8 @@ defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">; defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">; // 12.2. Vector Single-Width Averaging Add and Subtract -foreach vti = AllIntegerVectors in { - let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1), - (vti.Vector vti.RegClass:$rs2), - vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), - (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK") - vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1), - (vti.Vector (SplatPat (XLenVT GPR:$rs2))), - vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), - (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK") - vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - } -} +defm : VPatAVGADDVL_VV_VX_RM<riscv_avgflooru_vl, 0b10>; +defm : VPatAVGADDVL_VV_VX_RM<riscv_avgceilu_vl, 0b00>; // 12.5. Vector Narrowing Fixed-Point Clip Instructions class VPatTruncSatClipMaxMinBase<string inst, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td index a09f5715b24f..ffcdd0010749 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -17,15 +17,107 @@ // Zacas (Atomic Compare-and-Swap) //===----------------------------------------------------------------------===// +def GPRPairRV32Operand : AsmOperandClass { + let Name = "GPRPairRV32"; + let ParserMethod = "parseGPRPair<false>"; + let PredicateMethod = "isGPRPair"; + let RenderMethod = "addRegOperands"; +} + +def GPRPairRV64Operand : AsmOperandClass { + let Name = "GPRPairRV64"; + let ParserMethod = "parseGPRPair<true>"; + let PredicateMethod = "isGPRPair"; + let RenderMethod = "addRegOperands"; +} + +def GPRPairRV32 : RegisterOperand<GPRPair> { + let ParserMatchClass = GPRPairRV32Operand; +} + +def GPRPairRV64 : RegisterOperand<GPRPair> { + let ParserMatchClass = GPRPairRV64Operand; +} + +let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "$rd = $rd_wb" in +class AMO_cas<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr, + DAGOperand RC> + : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO, + (outs RC:$rd_wb), (ins RC:$rd, GPRMemZeroOffset:$rs1, RC:$rs2), + opcodestr, "$rd, $rs2, $rs1">; + +multiclass AMO_cas_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr, + DAGOperand RC> { + def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>; + def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>; + def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>; + def _AQ_RL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>; +} + let Predicates = [HasStdExtZacas] in { -defm AMOCAS_W : AMO_rr_aq_rl<0b00101, 0b010, "amocas.w">; -defm AMOCAS_D : AMO_rr_aq_rl<0b00101, 0b011, "amocas.d">; +defm AMOCAS_W : AMO_cas_aq_rl<0b00101, 0b010, "amocas.w", GPR>; } // Predicates = [HasStdExtZacas] +let Predicates = [HasStdExtZacas, IsRV32], DecoderNamespace = "RV32Zacas" in { +defm AMOCAS_D_RV32 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPRPairRV32>; +} // Predicates = [HasStdExtZacas, IsRV32] + let Predicates = [HasStdExtZacas, IsRV64] in { -defm AMOCAS_Q : AMO_rr_aq_rl<0b00101, 0b100, "amocas.q">; +defm AMOCAS_D_RV64 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPR>; +defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>; } // Predicates = [HasStdExtZacas, IsRV64] +multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, + list<Predicate> ExtraPreds = []> { + let Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) in { + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + } // Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) + let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in { + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + } // Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) +} + +defm : AMOCASPat<"atomic_cmp_swap_32", "AMOCAS_W">; +defm : AMOCASPat<"atomic_cmp_swap_64", "AMOCAS_D_RV64", i64, [IsRV64]>; + //===----------------------------------------------------------------------===// // Zawrs (Wait-on-Reservation-Set) //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index a59d058382fe..5a4d8c4cfece 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -63,7 +63,10 @@ def sub_vrm1_5 : ComposedSubRegIndex<sub_vrm2_2, sub_vrm1_1>; def sub_vrm1_6 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_0>; def sub_vrm1_7 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_1>; -def sub_32_hi : SubRegIndex<32, 32>; +// GPR sizes change with HwMode. +// FIXME: Support HwMode in SubRegIndex? +def sub_gpr_even : SubRegIndex<-1>; +def sub_gpr_odd : SubRegIndex<-1, -1>; } // Namespace = "RISCV" // Integer registers @@ -118,6 +121,8 @@ def XLenVT : ValueTypeByHwMode<[RV32, RV64], // Allow f64 in GPR for ZDINX on RV64. def XLenFVT : ValueTypeByHwMode<[RV64], [f64]>; +def XLenPairFVT : ValueTypeByHwMode<[RV32], + [f64]>; def XLenRI : RegInfoByHwMode< [RV32, RV64], [RegInfo<32,32,32>, RegInfo<64,64,64>]>; @@ -546,33 +551,37 @@ def DUMMY_REG_PAIR_WITH_X0 : RISCVReg<0, "0">; def GPRAll : GPRRegisterClass<(add GPR, DUMMY_REG_PAIR_WITH_X0)>; let RegAltNameIndices = [ABIRegAltName] in { - def X0_PD : RISCVRegWithSubRegs<0, X0.AsmName, - [X0, DUMMY_REG_PAIR_WITH_X0], - X0.AltNames> { - let SubRegIndices = [sub_32, sub_32_hi]; + def X0_Pair : RISCVRegWithSubRegs<0, X0.AsmName, + [X0, DUMMY_REG_PAIR_WITH_X0], + X0.AltNames> { + let SubRegIndices = [sub_gpr_even, sub_gpr_odd]; let CoveredBySubRegs = 1; } foreach I = 1-15 in { defvar Index = !shl(I, 1); + defvar IndexP1 = !add(Index, 1); defvar Reg = !cast<Register>("X"#Index); - defvar RegP1 = !cast<Register>("X"#!add(Index,1)); - def X#Index#_PD : RISCVRegWithSubRegs<Index, Reg.AsmName, - [Reg, RegP1], - Reg.AltNames> { - let SubRegIndices = [sub_32, sub_32_hi]; + defvar RegP1 = !cast<Register>("X"#IndexP1); + def "X" # Index #"_X" # IndexP1 : RISCVRegWithSubRegs<Index, + Reg.AsmName, + [Reg, RegP1], + Reg.AltNames> { + let SubRegIndices = [sub_gpr_even, sub_gpr_odd]; let CoveredBySubRegs = 1; } } } -let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in -def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add - X10_PD, X12_PD, X14_PD, X16_PD, - X6_PD, - X28_PD, X30_PD, - X8_PD, - X18_PD, X20_PD, X22_PD, X24_PD, X26_PD, - X0_PD, X2_PD, X4_PD +let RegInfos = RegInfoByHwMode<[RV32, RV64], + [RegInfo<64, 64, 64>, RegInfo<128, 128, 128>]>, + DecoderMethod = "DecodeGPRPairRegisterClass" in +def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add + X10_X11, X12_X13, X14_X15, X16_X17, + X6_X7, + X28_X29, X30_X31, + X8_X9, + X18_X19, X20_X21, X22_X23, X24_X25, X26_X27, + X0_Pair, X2_X3, X4_X5 )>; // The register class is added for inline assembly for vector mask types. diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 320f91c76057..815eca1240d8 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1649,7 +1649,7 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { } } if (Node->getValueType(0) == MVT::i128) { - const APInt &Val = cast<ConstantSDNode>(Node)->getAPIntValue(); + const APInt &Val = Node->getAsAPIntVal(); SystemZVectorConstantInfo VCI(Val); if (VCI.isVectorConstantLegal(*Subtarget)) { loadVectorConstant(VCI, Node); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 2450c6801a66..7d387c7b9f2f 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -340,6 +340,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); + // Also expand 256 bit shifts if i128 is a legal type. + if (isTypeLegal(MVT::i128)) { + setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand); + } + // Handle bitcast from fp128 to i128. if (!isTypeLegal(MVT::i128)) setOperationAction(ISD::BITCAST, MVT::i128, Custom); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 15dc44a04395..7f0140a5e8c6 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -839,9 +839,9 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { unsigned Reg; - if (Attrs.hasParamAttr(I, Attribute::SExt)) + if (Call->paramHasAttr(I, Attribute::SExt)) Reg = getRegForSignedValue(V); - else if (Attrs.hasParamAttr(I, Attribute::ZExt)) + else if (Call->paramHasAttr(I, Attribute::ZExt)) Reg = getRegForUnsignedValue(V); else Reg = getRegForValue(V); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 304b998e1f26..e006dd877360 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -148,21 +148,25 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::AND16ri8: case X86::AND16rm: case X86::AND16rr: + case X86::AND16rr_REV: case X86::AND32i32: case X86::AND32ri: case X86::AND32ri8: case X86::AND32rm: case X86::AND32rr: + case X86::AND32rr_REV: case X86::AND64i32: case X86::AND64ri32: case X86::AND64ri8: case X86::AND64rm: case X86::AND64rr: + case X86::AND64rr_REV: case X86::AND8i8: case X86::AND8ri: case X86::AND8ri8: case X86::AND8rm: case X86::AND8rr: + case X86::AND8rr_REV: return FirstMacroFusionInstKind::And; // CMP case X86::CMP16i16: @@ -171,24 +175,28 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::CMP16ri8: case X86::CMP16rm: case X86::CMP16rr: + case X86::CMP16rr_REV: case X86::CMP32i32: case X86::CMP32mr: case X86::CMP32ri: case X86::CMP32ri8: case X86::CMP32rm: case X86::CMP32rr: + case X86::CMP32rr_REV: case X86::CMP64i32: case X86::CMP64mr: case X86::CMP64ri32: case X86::CMP64ri8: case X86::CMP64rm: case X86::CMP64rr: + case X86::CMP64rr_REV: case X86::CMP8i8: case X86::CMP8mr: case X86::CMP8ri: case X86::CMP8ri8: case X86::CMP8rm: case X86::CMP8rr: + case X86::CMP8rr_REV: return FirstMacroFusionInstKind::Cmp; // ADD case X86::ADD16i16: @@ -196,42 +204,50 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::ADD16ri8: case X86::ADD16rm: case X86::ADD16rr: + case X86::ADD16rr_REV: case X86::ADD32i32: case X86::ADD32ri: case X86::ADD32ri8: case X86::ADD32rm: case X86::ADD32rr: + case X86::ADD32rr_REV: case X86::ADD64i32: case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD64rm: case X86::ADD64rr: + case X86::ADD64rr_REV: case X86::ADD8i8: case X86::ADD8ri: case X86::ADD8ri8: case X86::ADD8rm: case X86::ADD8rr: + case X86::ADD8rr_REV: // SUB case X86::SUB16i16: case X86::SUB16ri: case X86::SUB16ri8: case X86::SUB16rm: case X86::SUB16rr: + case X86::SUB16rr_REV: case X86::SUB32i32: case X86::SUB32ri: case X86::SUB32ri8: case X86::SUB32rm: case X86::SUB32rr: + case X86::SUB32rr_REV: case X86::SUB64i32: case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB64rm: case X86::SUB64rr: + case X86::SUB64rr_REV: case X86::SUB8i8: case X86::SUB8ri: case X86::SUB8ri8: case X86::SUB8rm: case X86::SUB8rr: + case X86::SUB8rr_REV: return FirstMacroFusionInstKind::AddSub; // INC case X86::INC16r: diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index aad839b83ee1..b13bf361ab79 100644 --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -173,6 +173,7 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ case X86::MNEMONIC##8ri: \ diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5a28240ea9e2..700ab797b2f6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2444,6 +2444,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::SRL, ISD::OR, ISD::AND, + ISD::BITREVERSE, ISD::ADD, ISD::FADD, ISD::FSUB, @@ -4821,8 +4822,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, APInt UndefSrcElts(NumSrcElts, 0); SmallVector<APInt, 64> SrcEltBits; - auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0)); - SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits)); + const APInt &C = Op.getOperand(0).getConstantOperandAPInt(0); + SrcEltBits.push_back(C.zextOrTrunc(SrcEltSizeInBits)); SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); return CastBitData(UndefSrcElts, SrcEltBits); } @@ -17223,6 +17224,7 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, "Cannot lower 512-bit vectors w/o basic ISA!"); int NumElts = Mask.size(); + int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); // Try to recognize shuffles that are just padding a subvector with zeros. int SubvecElts = 0; @@ -17288,17 +17290,18 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, Offset += NumElts; // Increment for next iteration. } - // If we're broadcasting a SETCC result, try to broadcast the ops instead. + // If we're performing an unary shuffle on a SETCC result, try to shuffle the + // ops instead. // TODO: What other unary shuffles would benefit from this? - if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC && - V1->hasOneUse()) { + if (NumV2Elements == 0 && V1.getOpcode() == ISD::SETCC && V1->hasOneUse()) { SDValue Op0 = V1.getOperand(0); SDValue Op1 = V1.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get(); EVT OpVT = Op0.getValueType(); - return DAG.getSetCC( - DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask), - DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC); + if (OpVT.getScalarSizeInBits() >= 32 || isBroadcastShuffleMask(Mask)) + return DAG.getSetCC( + DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask), + DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC); } MVT ExtVT; @@ -22551,7 +22554,7 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, // FIXME: Do this for non-constant compares for constant on LHS? if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) && Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub. - cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 && + Op1->getAsAPIntVal().getActiveBits() <= 32 && DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) { CmpVT = MVT::i32; Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0); @@ -47029,8 +47032,8 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); - APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue(); - APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue(); + APInt ShlConst = N01->getAsAPIntVal(); + APInt SarConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); if (SarConst.isNegative()) @@ -51835,6 +51838,33 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, return combineFneg(N, DAG, DCI, Subtarget); } +static SDValue combineBITREVERSE(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Convert a (iX bitreverse(bitcast(vXi1 X))) -> (iX bitcast(shuffle(X))) + if (VT.isInteger() && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) { + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + if (SrcVT.isVector() && SrcVT.getScalarType() == MVT::i1 && + (DCI.isBeforeLegalize() || + DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) && + Subtarget.hasSSSE3()) { + unsigned NumElts = SrcVT.getVectorNumElements(); + SmallVector<int, 32> ReverseMask(NumElts); + for (unsigned I = 0; I != NumElts; ++I) + ReverseMask[I] = (NumElts - 1) - I; + SDValue Rev = + DAG.getVectorShuffle(SrcVT, SDLoc(N), Src, Src, ReverseMask); + return DAG.getBitcast(VT, Rev); + } + } + + return SDValue(); +} + static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -56124,6 +56154,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); + case ISD::BITREVERSE: return combineBITREVERSE(N, DAG, DCI, Subtarget); case X86ISD::BEXTR: case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget); case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 5cfa95e085e3..76b0fe5f5cad 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -1107,43 +1107,85 @@ def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS), // Patterns for basic arithmetic ops with relocImm for the immediate field. multiclass ArithBinOp_RF_relocImm_Pats<SDNode OpNodeFlag, SDNode OpNode> { - def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), - (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; - def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), - (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; - def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), - (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; - def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), - (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; + let Predicates = [NoNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), + (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), + (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), + (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), + (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; + + def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; + def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; + def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; + def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + } + let Predicates = [HasNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), + (!cast<Instruction>(NAME#"8ri_ND") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), + (!cast<Instruction>(NAME#"16ri_ND") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), + (!cast<Instruction>(NAME#"32ri_ND") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), + (!cast<Instruction>(NAME#"64ri32_ND") GR64:$src1, i64relocImmSExt32_su:$src2)>; - def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + def : Pat<(OpNode (load addr:$dst), relocImm8_su:$src), + (!cast<Instruction>(NAME#"8mi_ND") addr:$dst, relocImm8_su:$src)>; + def : Pat<(OpNode (load addr:$dst), relocImm16_su:$src), + (!cast<Instruction>(NAME#"16mi_ND") addr:$dst, relocImm16_su:$src)>; + def : Pat<(OpNode (load addr:$dst), relocImm32_su:$src), + (!cast<Instruction>(NAME#"32mi_ND") addr:$dst, relocImm32_su:$src)>; + def : Pat<(OpNode (load addr:$dst), i64relocImmSExt32_su:$src), + (!cast<Instruction>(NAME#"64mi32_ND") addr:$dst, i64relocImmSExt32_su:$src)>; + } } multiclass ArithBinOp_RFF_relocImm_Pats<SDNode OpNodeFlag> { - def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; - def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; - def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; - def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; + let Predicates = [NoNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; + def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; + def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; + def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + } + let Predicates = [HasNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"8ri_ND") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"16ri_ND") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"32ri_ND") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"64ri32_ND") GR64:$src1, i64relocImmSExt32_su:$src2)>; + + def : Pat<(OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"8mi_ND") addr:$dst, relocImm8_su:$src)>; + def : Pat<(OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"16mi_ND") addr:$dst, relocImm16_su:$src)>; + def : Pat<(OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"32mi_ND") addr:$dst, relocImm32_su:$src)>; + def : Pat<(OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"64mi32_ND") addr:$dst, i64relocImmSExt32_su:$src)>; + } } multiclass ArithBinOp_F_relocImm_Pats<SDNode OpNodeFlag> { diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index c77c77ee4a3e..422391a6e02a 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1550,13 +1550,24 @@ def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000), // AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32. let AddedComplexity = 1 in { -def : Pat<(and GR64:$src, i64immZExt32:$imm), - (SUBREG_TO_REG - (i64 0), - (AND32ri - (EXTRACT_SUBREG GR64:$src, sub_32bit), - (i32 (GetLo32XForm imm:$imm))), - sub_32bit)>; + let Predicates = [NoNDD] in { + def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri + (EXTRACT_SUBREG GR64:$src, sub_32bit), + (i32 (GetLo32XForm imm:$imm))), + sub_32bit)>; + } + let Predicates = [HasNDD] in { + def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri_ND + (EXTRACT_SUBREG GR64:$src, sub_32bit), + (i32 (GetLo32XForm imm:$imm))), + sub_32bit)>; + } } // AddedComplexity = 1 @@ -1762,10 +1773,18 @@ def : Pat<(X86xor_flag (i8 (trunc GR32:$src)), // where the least significant bit is not 0. However, the probability of this // happening is considered low enough that this is officially not a // "real problem". -def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; -def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; -def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; -def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; +let Predicates = [NoNDD] in { + def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; + def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; + def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; + def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; +} +let Predicates = [HasNDD] in { + def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr_ND GR8 :$src1, GR8 :$src1)>; + def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr_ND GR16:$src1, GR16:$src1)>; + def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr_ND GR32:$src1, GR32:$src1)>; + def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr_ND GR64:$src1, GR64:$src1)>; +} // Shift amount is implicitly masked. multiclass MaskedShiftAmountPats<SDNode frag, string name> { @@ -1937,75 +1956,179 @@ defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>; // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// -// add reg, reg -def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; -def : Pat<(add GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; +multiclass EFLAGSDefiningPats<string suffix, Predicate p> { + let Predicates = [p] in { + // add reg, reg + def : Pat<(add GR8 :$src1, GR8 :$src2), (!cast<Instruction>(ADD8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(add GR16:$src1, GR16:$src2), (!cast<Instruction>(ADD16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(add GR32:$src1, GR32:$src2), (!cast<Instruction>(ADD32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(add GR64:$src1, GR64:$src2), (!cast<Instruction>(ADD64rr#suffix) GR64:$src1, GR64:$src2)>; + + // add reg, mem + def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(ADD8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(ADD16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(ADD32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(ADD64rm#suffix) GR64:$src1, addr:$src2)>; + + // add reg, imm + def : Pat<(add GR8 :$src1, imm:$src2), (!cast<Instruction>(ADD8ri#suffix) GR8:$src1 , imm:$src2)>; + def : Pat<(add GR16:$src1, imm:$src2), (!cast<Instruction>(ADD16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(add GR32:$src1, imm:$src2), (!cast<Instruction>(ADD32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(add GR64:$src1, i64immSExt32:$src2), (!cast<Instruction>(ADD64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; -// add reg, mem -def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), - (ADD8rm GR8:$src1, addr:$src2)>; -def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), - (ADD16rm GR16:$src1, addr:$src2)>; -def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), - (ADD32rm GR32:$src1, addr:$src2)>; -def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), - (ADD64rm GR64:$src1, addr:$src2)>; + // sub reg, reg + def : Pat<(sub GR8 :$src1, GR8 :$src2), (!cast<Instruction>(SUB8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(sub GR16:$src1, GR16:$src2), (!cast<Instruction>(SUB16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(sub GR32:$src1, GR32:$src2), (!cast<Instruction>(SUB32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(sub GR64:$src1, GR64:$src2), (!cast<Instruction>(SUB64rr#suffix) GR64:$src1, GR64:$src2)>; -// add reg, imm -def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; -def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; + // sub reg, mem + def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(SUB8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(SUB16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(SUB32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(SUB64rm#suffix) GR64:$src1, addr:$src2)>; -// sub reg, reg -def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>; -def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>; -def : Pat<(sub GR64:$src1, GR64:$src2), (SUB64rr GR64:$src1, GR64:$src2)>; + // sub reg, imm + def : Pat<(sub GR8:$src1, imm:$src2), + (!cast<Instruction>(SUB8ri#suffix) GR8:$src1, imm:$src2)>; + def : Pat<(sub GR16:$src1, imm:$src2), + (!cast<Instruction>(SUB16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(sub GR32:$src1, imm:$src2), + (!cast<Instruction>(SUB32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(sub GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(SUB64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; -// sub reg, mem -def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), - (SUB8rm GR8:$src1, addr:$src2)>; -def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), - (SUB16rm GR16:$src1, addr:$src2)>; -def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), - (SUB32rm GR32:$src1, addr:$src2)>; -def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), - (SUB64rm GR64:$src1, addr:$src2)>; + // sub 0, reg + def : Pat<(X86sub_flag 0, GR8 :$src), (!cast<Instruction>(NEG8r#suffix) GR8 :$src)>; + def : Pat<(X86sub_flag 0, GR16:$src), (!cast<Instruction>(NEG16r#suffix) GR16:$src)>; + def : Pat<(X86sub_flag 0, GR32:$src), (!cast<Instruction>(NEG32r#suffix) GR32:$src)>; + def : Pat<(X86sub_flag 0, GR64:$src), (!cast<Instruction>(NEG64r#suffix) GR64:$src)>; -// sub reg, imm -def : Pat<(sub GR8:$src1, imm:$src2), - (SUB8ri GR8:$src1, imm:$src2)>; -def : Pat<(sub GR16:$src1, imm:$src2), - (SUB16ri GR16:$src1, imm:$src2)>; -def : Pat<(sub GR32:$src1, imm:$src2), - (SUB32ri GR32:$src1, imm:$src2)>; -def : Pat<(sub GR64:$src1, i64immSExt32:$src2), - (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; + // mul reg, reg + def : Pat<(mul GR16:$src1, GR16:$src2), + (!cast<Instruction>(IMUL16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(mul GR32:$src1, GR32:$src2), + (!cast<Instruction>(IMUL32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(mul GR64:$src1, GR64:$src2), + (!cast<Instruction>(IMUL64rr#suffix) GR64:$src1, GR64:$src2)>; -// sub 0, reg -def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>; -def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>; -def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>; -def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>; + // mul reg, mem + def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(IMUL16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(IMUL32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(IMUL64rm#suffix) GR64:$src1, addr:$src2)>; -// mul reg, reg -def : Pat<(mul GR16:$src1, GR16:$src2), - (IMUL16rr GR16:$src1, GR16:$src2)>; -def : Pat<(mul GR32:$src1, GR32:$src2), - (IMUL32rr GR32:$src1, GR32:$src2)>; -def : Pat<(mul GR64:$src1, GR64:$src2), - (IMUL64rr GR64:$src1, GR64:$src2)>; + // or reg/reg. + def : Pat<(or GR8 :$src1, GR8 :$src2), (!cast<Instruction>(OR8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(or GR16:$src1, GR16:$src2), (!cast<Instruction>(OR16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(or GR32:$src1, GR32:$src2), (!cast<Instruction>(OR32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(or GR64:$src1, GR64:$src2), (!cast<Instruction>(OR64rr#suffix) GR64:$src1, GR64:$src2)>; + + // or reg/mem + def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(OR8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(OR16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(OR32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(OR64rm#suffix) GR64:$src1, addr:$src2)>; + + // or reg/imm + def : Pat<(or GR8:$src1 , imm:$src2), (!cast<Instruction>(OR8ri#suffix) GR8 :$src1, imm:$src2)>; + def : Pat<(or GR16:$src1, imm:$src2), (!cast<Instruction>(OR16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(or GR32:$src1, imm:$src2), (!cast<Instruction>(OR32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(or GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(OR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + + // xor reg/reg + def : Pat<(xor GR8 :$src1, GR8 :$src2), (!cast<Instruction>(XOR8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(xor GR16:$src1, GR16:$src2), (!cast<Instruction>(XOR16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(xor GR32:$src1, GR32:$src2), (!cast<Instruction>(XOR32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(xor GR64:$src1, GR64:$src2), (!cast<Instruction>(XOR64rr#suffix) GR64:$src1, GR64:$src2)>; + + // xor reg/mem + def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(XOR8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(XOR16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(XOR32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(XOR64rm#suffix) GR64:$src1, addr:$src2)>; + + // xor reg/imm + def : Pat<(xor GR8:$src1, imm:$src2), + (!cast<Instruction>(XOR8ri#suffix) GR8:$src1, imm:$src2)>; + def : Pat<(xor GR16:$src1, imm:$src2), + (!cast<Instruction>(XOR16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(xor GR32:$src1, imm:$src2), + (!cast<Instruction>(XOR32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(xor GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(XOR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + + // and reg/reg + def : Pat<(and GR8 :$src1, GR8 :$src2), (!cast<Instruction>(AND8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(and GR16:$src1, GR16:$src2), (!cast<Instruction>(AND16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(and GR32:$src1, GR32:$src2), (!cast<Instruction>(AND32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(and GR64:$src1, GR64:$src2), (!cast<Instruction>(AND64rr#suffix) GR64:$src1, GR64:$src2)>; + + // and reg/mem + def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(AND8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(AND16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(AND32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(AND64rm#suffix) GR64:$src1, addr:$src2)>; + + // and reg/imm + def : Pat<(and GR8:$src1, imm:$src2), + (!cast<Instruction>(AND8ri#suffix) GR8:$src1, imm:$src2)>; + def : Pat<(and GR16:$src1, imm:$src2), + (!cast<Instruction>(AND16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(and GR32:$src1, imm:$src2), + (!cast<Instruction>(AND32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(and GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(AND64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + } -// mul reg, mem -def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), - (IMUL16rm GR16:$src1, addr:$src2)>; -def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), - (IMUL32rm GR32:$src1, addr:$src2)>; -def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), - (IMUL64rm GR64:$src1, addr:$src2)>; + // Increment/Decrement reg. + // Do not make INC/DEC if it is slow + let Predicates = [UseIncDec, p] in { + def : Pat<(add GR8:$src, 1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>; + def : Pat<(add GR16:$src, 1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>; + def : Pat<(add GR32:$src, 1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>; + def : Pat<(add GR64:$src, 1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>; + def : Pat<(add GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>; + def : Pat<(add GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>; + def : Pat<(add GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>; + def : Pat<(add GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>; + + def : Pat<(X86add_flag_nocf GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>; + def : Pat<(X86add_flag_nocf GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>; + def : Pat<(X86add_flag_nocf GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>; + def : Pat<(X86add_flag_nocf GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>; + def : Pat<(X86sub_flag_nocf GR8:$src, -1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>; + def : Pat<(X86sub_flag_nocf GR16:$src, -1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>; + def : Pat<(X86sub_flag_nocf GR32:$src, -1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>; + def : Pat<(X86sub_flag_nocf GR64:$src, -1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>; + } +} + +defm : EFLAGSDefiningPats<"", NoNDD>; +defm : EFLAGSDefiningPats<"_ND", HasNDD>; // mul reg, imm def : Pat<(mul GR16:$src1, imm:$src2), @@ -2023,103 +2146,6 @@ def : Pat<(mul (loadi32 addr:$src1), imm:$src2), def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; -// Increment/Decrement reg. -// Do not make INC/DEC if it is slow -let Predicates = [UseIncDec] in { - def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>; - def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>; - def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>; - def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; - def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>; - def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>; - def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>; - def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; - - def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>; - def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>; - def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>; - def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>; - def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>; - def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>; - def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>; - def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>; -} - -// or reg/reg. -def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>; -def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>; - -// or reg/mem -def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), - (OR8rm GR8:$src1, addr:$src2)>; -def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), - (OR16rm GR16:$src1, addr:$src2)>; -def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), - (OR32rm GR32:$src1, addr:$src2)>; -def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), - (OR64rm GR64:$src1, addr:$src2)>; - -// or reg/imm -def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; -def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; -def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; -def : Pat<(or GR64:$src1, i64immSExt32:$src2), - (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// xor reg/reg -def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>; -def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>; - -// xor reg/mem -def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), - (XOR8rm GR8:$src1, addr:$src2)>; -def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), - (XOR16rm GR16:$src1, addr:$src2)>; -def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), - (XOR32rm GR32:$src1, addr:$src2)>; -def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), - (XOR64rm GR64:$src1, addr:$src2)>; - -// xor reg/imm -def : Pat<(xor GR8:$src1, imm:$src2), - (XOR8ri GR8:$src1, imm:$src2)>; -def : Pat<(xor GR16:$src1, imm:$src2), - (XOR16ri GR16:$src1, imm:$src2)>; -def : Pat<(xor GR32:$src1, imm:$src2), - (XOR32ri GR32:$src1, imm:$src2)>; -def : Pat<(xor GR64:$src1, i64immSExt32:$src2), - (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// and reg/reg -def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>; -def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>; -def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>; - -// and reg/mem -def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), - (AND8rm GR8:$src1, addr:$src2)>; -def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), - (AND16rm GR16:$src1, addr:$src2)>; -def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), - (AND32rm GR32:$src1, addr:$src2)>; -def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), - (AND64rm GR64:$src1, addr:$src2)>; - -// and reg/imm -def : Pat<(and GR8:$src1, imm:$src2), - (AND8ri GR8:$src1, imm:$src2)>; -def : Pat<(and GR16:$src1, imm:$src2), - (AND16ri GR16:$src1, imm:$src2)>; -def : Pat<(and GR32:$src1, imm:$src2), - (AND32ri GR32:$src1, imm:$src2)>; -def : Pat<(and GR64:$src1, i64immSExt32:$src2), - (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; - // Bit scan instruction patterns to match explicit zero-undef behavior. def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>; def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>; diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 97c625a64cfc..753cf62392a1 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -1523,28 +1523,28 @@ def MOVDIR64B64_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$ // ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity // let SchedRW = [WriteStore], Defs = [EFLAGS] in { - def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), + def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>, T8, XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; - def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), + def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>, T8, XD, AdSize32, Requires<[HasENQCMD]>; - def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), + def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>, T8, XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>; - def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), + def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>, T8, XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; - def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), + def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>, T8, XS, AdSize32, Requires<[HasENQCMD]>; - def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), + def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>, T8, XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>; diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td index 49ef6efc6aec..48d689549709 100644 --- a/llvm/lib/Target/X86/X86PfmCounters.td +++ b/llvm/lib/Target/X86/X86PfmCounters.td @@ -18,6 +18,10 @@ def DefaultPfmCounters : ProcPfmCounters {} def : PfmCountersDefaultBinding<DefaultPfmCounters>; // Intel X86 Counters. +defvar DefaultIntelPfmValidationCounters = [ + PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED"> +]; + def PentiumPfmCounters : ProcPfmCounters { let CycleCounter = PfmCounter<"cpu_clk_unhalted">; let UopsCounter = PfmCounter<"uops_retired">; @@ -100,6 +104,7 @@ def SandyBridgePfmCounters : ProcPfmCounters { PfmIssueCounter<"SBPort4", "uops_dispatched_port:port_4">, PfmIssueCounter<"SBPort5", "uops_dispatched_port:port_5"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>; def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>; @@ -117,6 +122,7 @@ def HaswellPfmCounters : ProcPfmCounters { PfmIssueCounter<"HWPort6", "uops_executed_port:port_6">, PfmIssueCounter<"HWPort7", "uops_executed_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"haswell", HaswellPfmCounters>; @@ -133,6 +139,7 @@ def BroadwellPfmCounters : ProcPfmCounters { PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">, PfmIssueCounter<"BWPort7", "uops_executed_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>; @@ -149,6 +156,7 @@ def SkylakeClientPfmCounters : ProcPfmCounters { PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">, PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>; @@ -165,6 +173,7 @@ def SkylakeServerPfmCounters : ProcPfmCounters { PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">, PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>; def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>; @@ -182,6 +191,7 @@ def IceLakePfmCounters : ProcPfmCounters { PfmIssueCounter<"ICXPort6", "uops_dispatched_port:port_6">, PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"icelake-client", IceLakePfmCounters>; def : PfmCountersBinding<"icelake-server", IceLakePfmCounters>; @@ -189,6 +199,10 @@ def : PfmCountersBinding<"rocketlake", IceLakePfmCounters>; def : PfmCountersBinding<"tigerlake", IceLakePfmCounters>; // AMD X86 Counters. +defvar DefaultAMDPfmValidationCounters = [ + PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS"> +]; + // Set basic counters for AMD cpus that we know libpfm4 supports. def DefaultAMDPfmCounters : ProcPfmCounters { let CycleCounter = PfmCounter<"cpu_clk_unhalted">; @@ -265,6 +279,7 @@ def ZnVer1PfmCounters : ProcPfmCounters { PfmIssueCounter<"ZnAGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">, PfmIssueCounter<"ZnDivider", "div_op_count"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>; @@ -275,6 +290,7 @@ def ZnVer2PfmCounters : ProcPfmCounters { PfmIssueCounter<"Zn2AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">, PfmIssueCounter<"Zn2Divider", "div_op_count"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver2", ZnVer2PfmCounters>; @@ -288,6 +304,7 @@ def ZnVer3PfmCounters : ProcPfmCounters { PfmIssueCounter<"Zn3Store", "ls_dispatch:store_dispatch">, PfmIssueCounter<"Zn3Divider", "div_op_count"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>; @@ -302,5 +319,6 @@ def ZnVer4PfmCounters : ProcPfmCounters { PfmIssueCounter<"Zn4Divider", "div_op_count">, PfmIssueCounter<"Zn4AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>; diff --git a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp index 6d3a59d532fd..45d04f9bcbfb 100644 --- a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp +++ b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp @@ -140,13 +140,14 @@ ARM::EndianKind ARM::parseArchEndian(StringRef Arch) { // an erroneous part of the spec. bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, StringRef &Err) { - PBP = {"none", "a_key", false, false}; + PBP = {"none", "a_key", false, false, false}; if (Spec == "none") return true; // defaults are ok if (Spec == "standard") { PBP.Scope = "non-leaf"; PBP.BranchTargetEnforcement = true; + PBP.GuardedControlStack = true; return true; } @@ -173,6 +174,10 @@ bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, } continue; } + if (Opt == "gcs") { + PBP.GuardedControlStack = true; + continue; + } if (Opt == "") Err = "<empty>"; else diff --git a/llvm/lib/TextAPI/InterfaceFile.cpp b/llvm/lib/TextAPI/InterfaceFile.cpp index 3689ab919191..d712ed386825 100644 --- a/llvm/lib/TextAPI/InterfaceFile.cpp +++ b/llvm/lib/TextAPI/InterfaceFile.cpp @@ -24,17 +24,23 @@ void InterfaceFileRef::addTarget(const Target &Target) { void InterfaceFile::addAllowableClient(StringRef InstallName, const Target &Target) { + if (InstallName.empty()) + return; auto Client = addEntry(AllowableClients, InstallName); Client->addTarget(Target); } void InterfaceFile::addReexportedLibrary(StringRef InstallName, const Target &Target) { + if (InstallName.empty()) + return; auto Lib = addEntry(ReexportedLibraries, InstallName); Lib->addTarget(Target); } void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) { + if (Parent.empty()) + return; auto Iter = lower_bound(ParentUmbrellas, Target_, [](const std::pair<Target, std::string> &LHS, Target RHS) { return LHS.first < RHS; }); @@ -48,6 +54,8 @@ void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) { } void InterfaceFile::addRPath(const Target &InputTarget, StringRef RPath) { + if (RPath.empty()) + return; using RPathEntryT = const std::pair<Target, std::string>; RPathEntryT Entry(InputTarget, RPath); auto Iter = diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 70a3f3067d9d..0a6f69bc73d5 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -77,6 +77,16 @@ STATISTIC(MaxAllocVersionsThinBackend, "allocation during ThinLTO backend"); STATISTIC(UnclonableAllocsThinBackend, "Number of unclonable ambigous allocations during ThinLTO backend"); +STATISTIC(RemovedEdgesWithMismatchedCallees, + "Number of edges removed due to mismatched callees (profiled vs IR)"); +STATISTIC(FoundProfiledCalleeCount, + "Number of profiled callees found via tail calls"); +STATISTIC(FoundProfiledCalleeDepth, + "Aggregate depth of profiled callees found via tail calls"); +STATISTIC(FoundProfiledCalleeMaxDepth, + "Maximum depth of profiled callees found via tail calls"); +STATISTIC(FoundProfiledCalleeNonUniquelyCount, + "Number of profiled callees found via multiple tail call chains"); static cl::opt<std::string> DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -104,6 +114,12 @@ static cl::opt<std::string> MemProfImportSummary( cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden); +static cl::opt<unsigned> + TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), + cl::Hidden, + cl::desc("Max depth to recursively search for missing " + "frames through tail calls.")); + namespace llvm { // Indicate we are linking with an allocator that supports hot/cold operator // new interfaces. @@ -365,8 +381,7 @@ protected: /// Save lists of calls with MemProf metadata in each function, for faster /// iteration. - std::vector<std::pair<FuncTy *, std::vector<CallInfo>>> - FuncToCallsWithMetadata; + MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata; /// Map from callsite node to the enclosing caller function. std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc; @@ -411,9 +426,25 @@ private: return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex); } - /// Returns true if the given call targets the given function. - bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) { - return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(Call, Func); + /// Returns true if the given call targets the callee of the given edge, or if + /// we were able to identify the call chain through intermediate tail calls. + /// In the latter case new context nodes are added to the graph for the + /// identified tail calls, and their synthesized nodes are added to + /// TailCallToContextNodeMap. The EdgeIter is updated in either case to the + /// next element after the input position (either incremented or updated after + /// removing the old edge). + bool + calleesMatch(CallTy Call, EdgeIter &EI, + MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap); + + /// Returns true if the given call targets the given function, or if we were + /// able to identify the call chain through intermediate tail calls (in which + /// case FoundCalleeChain will be populated). + bool calleeMatchesFunc( + CallTy Call, const FuncTy *Func, const FuncTy *CallerFunc, + std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) { + return static_cast<DerivedCCG *>(this)->calleeMatchesFunc( + Call, Func, CallerFunc, FoundCalleeChain); } /// Get a list of nodes corresponding to the stack ids in the given @@ -553,7 +584,13 @@ private: Instruction *>; uint64_t getStackId(uint64_t IdOrIndex) const; - bool calleeMatchesFunc(Instruction *Call, const Function *Func); + bool calleeMatchesFunc( + Instruction *Call, const Function *Func, const Function *CallerFunc, + std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain); + bool findProfiledCalleeThroughTailCalls( + const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, + std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain, + bool &FoundMultipleCalleeChains); uint64_t getLastStackId(Instruction *Call); std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call); void updateAllocationCall(CallInfo &Call, AllocationType AllocType); @@ -606,12 +643,31 @@ public: function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing); + ~IndexCallsiteContextGraph() { + // Now that we are done with the graph it is safe to add the new + // CallsiteInfo structs to the function summary vectors. The graph nodes + // point into locations within these vectors, so we don't want to add them + // any earlier. + for (auto &I : FunctionCalleesToSynthesizedCallsiteInfos) { + auto *FS = I.first; + for (auto &Callsite : I.second) + FS->addCallsite(*Callsite.second); + } + } + private: friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, IndexCall>; uint64_t getStackId(uint64_t IdOrIndex) const; - bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func); + bool calleeMatchesFunc( + IndexCall &Call, const FunctionSummary *Func, + const FunctionSummary *CallerFunc, + std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain); + bool findProfiledCalleeThroughTailCalls( + ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, + std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain, + bool &FoundMultipleCalleeChains); uint64_t getLastStackId(IndexCall &Call); std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call); void updateAllocationCall(CallInfo &Call, AllocationType AllocType); @@ -630,6 +686,16 @@ private: std::map<const FunctionSummary *, ValueInfo> FSToVIMap; const ModuleSummaryIndex &Index; + function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + isPrevailing; + + // Saves/owns the callsite info structures synthesized for missing tail call + // frames that we discover while building the graph. + // It maps from the summary of the function making the tail call, to a map + // of callee ValueInfo to corresponding synthesized callsite info. + std::unordered_map<FunctionSummary *, + std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>> + FunctionCalleesToSynthesizedCallsiteInfos; }; } // namespace @@ -1493,7 +1559,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( } } if (!CallsWithMetadata.empty()) - FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata}); + FuncToCallsWithMetadata[&F] = CallsWithMetadata; } if (DumpCCG) { @@ -1518,7 +1584,7 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( ModuleSummaryIndex &Index, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing) - : Index(Index) { + : Index(Index), isPrevailing(isPrevailing) { for (auto &I : Index) { auto VI = Index.getValueInfo(I); for (auto &S : VI.getSummaryList()) { @@ -1572,7 +1638,7 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( CallsWithMetadata.push_back({&SN}); if (!CallsWithMetadata.empty()) - FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata}); + FuncToCallsWithMetadata[FS] = CallsWithMetadata; if (!FS->allocs().empty() || !FS->callsites().empty()) FSToVIMap[FS] = VI; @@ -1604,6 +1670,11 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, // this transformation for regular LTO, and for ThinLTO we can simulate that // effect in the summary and perform the actual speculative devirtualization // while cloning in the ThinLTO backend. + + // Keep track of the new nodes synthesized for discovered tail calls missing + // from the profiled contexts. + MapVector<CallInfo, ContextNode *> TailCallToContextNodeMap; + for (auto Entry = NonAllocationCallToContextNodeMap.begin(); Entry != NonAllocationCallToContextNodeMap.end();) { auto *Node = Entry->second; @@ -1611,13 +1682,17 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, // Check all node callees and see if in the same function. bool Removed = false; auto Call = Node->Call.call(); - for (auto &Edge : Node->CalleeEdges) { - if (!Edge->Callee->hasCall()) + for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) { + auto Edge = *EI; + if (!Edge->Callee->hasCall()) { + ++EI; continue; + } assert(NodeToCallingFunc.count(Edge->Callee)); // Check if the called function matches that of the callee node. - if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee])) + if (calleesMatch(Call, EI, TailCallToContextNodeMap)) continue; + RemovedEdgesWithMismatchedCallees++; // Work around by setting Node to have a null call, so it gets // skipped during cloning. Otherwise assignFunctions will assert // because its data structures are not designed to handle this case. @@ -1629,6 +1704,11 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, if (!Removed) Entry++; } + + // Add the new nodes after the above loop so that the iteration is not + // invalidated. + for (auto &[Call, Node] : TailCallToContextNodeMap) + NonAllocationCallToContextNodeMap[Call] = Node; } uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { @@ -1642,8 +1722,173 @@ uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { return Index.getStackIdAtIndex(IdOrIndex); } -bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, - const Function *Func) { +template <typename DerivedCCG, typename FuncTy, typename CallTy> +bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch( + CallTy Call, EdgeIter &EI, + MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) { + auto Edge = *EI; + const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee]; + const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller]; + // Will be populated in order of callee to caller if we find a chain of tail + // calls between the profiled caller and callee. + std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain; + if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc, + FoundCalleeChain)) { + ++EI; + return false; + } + + // The usual case where the profiled callee matches that of the IR/summary. + if (FoundCalleeChain.empty()) { + ++EI; + return true; + } + + auto AddEdge = [Edge, &EI](ContextNode *Caller, ContextNode *Callee) { + auto *CurEdge = Callee->findEdgeFromCaller(Caller); + // If there is already an edge between these nodes, simply update it and + // return. + if (CurEdge) { + CurEdge->ContextIds.insert(Edge->ContextIds.begin(), + Edge->ContextIds.end()); + CurEdge->AllocTypes |= Edge->AllocTypes; + return; + } + // Otherwise, create a new edge and insert it into the caller and callee + // lists. + auto NewEdge = std::make_shared<ContextEdge>( + Callee, Caller, Edge->AllocTypes, Edge->ContextIds); + Callee->CallerEdges.push_back(NewEdge); + if (Caller == Edge->Caller) { + // If we are inserting the new edge into the current edge's caller, insert + // the new edge before the current iterator position, and then increment + // back to the current edge. + EI = Caller->CalleeEdges.insert(EI, NewEdge); + ++EI; + assert(*EI == Edge && + "Iterator position not restored after insert and increment"); + } else + Caller->CalleeEdges.push_back(NewEdge); + }; + + // Create new nodes for each found callee and connect in between the profiled + // caller and callee. + auto *CurCalleeNode = Edge->Callee; + for (auto &[NewCall, Func] : FoundCalleeChain) { + ContextNode *NewNode = nullptr; + // First check if we have already synthesized a node for this tail call. + if (TailCallToContextNodeMap.count(NewCall)) { + NewNode = TailCallToContextNodeMap[NewCall]; + NewNode->ContextIds.insert(Edge->ContextIds.begin(), + Edge->ContextIds.end()); + NewNode->AllocTypes |= Edge->AllocTypes; + } else { + FuncToCallsWithMetadata[Func].push_back({NewCall}); + // Create Node and record node info. + NodeOwner.push_back( + std::make_unique<ContextNode>(/*IsAllocation=*/false, NewCall)); + NewNode = NodeOwner.back().get(); + NodeToCallingFunc[NewNode] = Func; + TailCallToContextNodeMap[NewCall] = NewNode; + NewNode->ContextIds = Edge->ContextIds; + NewNode->AllocTypes = Edge->AllocTypes; + } + + // Hook up node to its callee node + AddEdge(NewNode, CurCalleeNode); + + CurCalleeNode = NewNode; + } + + // Hook up edge's original caller to new callee node. + AddEdge(Edge->Caller, CurCalleeNode); + + // Remove old edge + Edge->Callee->eraseCallerEdge(Edge.get()); + EI = Edge->Caller->CalleeEdges.erase(EI); + + return true; +} + +bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls( + const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, + std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain, + bool &FoundMultipleCalleeChains) { + // Stop recursive search if we have already explored the maximum specified + // depth. + if (Depth > TailCallSearchDepth) + return false; + + auto SaveCallsiteInfo = [&](Instruction *Callsite, Function *F) { + FoundCalleeChain.push_back({Callsite, F}); + }; + + auto *CalleeFunc = dyn_cast<Function>(CurCallee); + if (!CalleeFunc) { + auto *Alias = dyn_cast<GlobalAlias>(CurCallee); + assert(Alias); + CalleeFunc = dyn_cast<Function>(Alias->getAliasee()); + assert(CalleeFunc); + } + + // Look for tail calls in this function, and check if they either call the + // profiled callee directly, or indirectly (via a recursive search). + // Only succeed if there is a single unique tail call chain found between the + // profiled caller and callee, otherwise we could perform incorrect cloning. + bool FoundSingleCalleeChain = false; + for (auto &BB : *CalleeFunc) { + for (auto &I : BB) { + auto *CB = dyn_cast<CallBase>(&I); + if (!CB || !CB->isTailCall()) + continue; + auto *CalledValue = CB->getCalledOperand(); + auto *CalledFunction = CB->getCalledFunction(); + if (CalledValue && !CalledFunction) { + CalledValue = CalledValue->stripPointerCasts(); + // Stripping pointer casts can reveal a called function. + CalledFunction = dyn_cast<Function>(CalledValue); + } + // Check if this is an alias to a function. If so, get the + // called aliasee for the checks below. + if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { + assert(!CalledFunction && + "Expected null called function in callsite for alias"); + CalledFunction = dyn_cast<Function>(GA->getAliaseeObject()); + } + if (!CalledFunction) + continue; + if (CalledFunction == ProfiledCallee) { + if (FoundSingleCalleeChain) { + FoundMultipleCalleeChains = true; + return false; + } + FoundSingleCalleeChain = true; + FoundProfiledCalleeCount++; + FoundProfiledCalleeDepth += Depth; + if (Depth > FoundProfiledCalleeMaxDepth) + FoundProfiledCalleeMaxDepth = Depth; + SaveCallsiteInfo(&I, CalleeFunc); + } else if (findProfiledCalleeThroughTailCalls( + ProfiledCallee, CalledFunction, Depth + 1, + FoundCalleeChain, FoundMultipleCalleeChains)) { + if (FoundMultipleCalleeChains) + return false; + if (FoundSingleCalleeChain) { + FoundMultipleCalleeChains = true; + return false; + } + FoundSingleCalleeChain = true; + SaveCallsiteInfo(&I, CalleeFunc); + } + } + } + + return FoundSingleCalleeChain; +} + +bool ModuleCallsiteContextGraph::calleeMatchesFunc( + Instruction *Call, const Function *Func, const Function *CallerFunc, + std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) { auto *CB = dyn_cast<CallBase>(Call); if (!CB->getCalledOperand()) return false; @@ -1652,11 +1897,117 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, if (CalleeFunc == Func) return true; auto *Alias = dyn_cast<GlobalAlias>(CalleeVal); - return Alias && Alias->getAliasee() == Func; + if (Alias && Alias->getAliasee() == Func) + return true; + + // Recursively search for the profiled callee through tail calls starting with + // the actual Callee. The discovered tail call chain is saved in + // FoundCalleeChain, and we will fixup the graph to include these callsites + // after returning. + // FIXME: We will currently redo the same recursive walk if we find the same + // mismatched callee from another callsite. We can improve this with more + // bookkeeping of the created chain of new nodes for each mismatch. + unsigned Depth = 1; + bool FoundMultipleCalleeChains = false; + if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal, Depth, + FoundCalleeChain, + FoundMultipleCalleeChains)) { + LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " + << Func->getName() << " from " << CallerFunc->getName() + << " that actually called " << CalleeVal->getName() + << (FoundMultipleCalleeChains + ? " (found multiple possible chains)" + : "") + << "\n"); + if (FoundMultipleCalleeChains) + FoundProfiledCalleeNonUniquelyCount++; + return false; + } + + return true; } -bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call, - const FunctionSummary *Func) { +bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls( + ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, + std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain, + bool &FoundMultipleCalleeChains) { + // Stop recursive search if we have already explored the maximum specified + // depth. + if (Depth > TailCallSearchDepth) + return false; + + auto CreateAndSaveCallsiteInfo = [&](ValueInfo Callee, FunctionSummary *FS) { + // Make a CallsiteInfo for each discovered callee, if one hasn't already + // been synthesized. + if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) || + !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee)) + // StackIds is empty (we don't have debug info available in the index for + // these callsites) + FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] = + std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>()); + CallsiteInfo *NewCallsiteInfo = + FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee].get(); + FoundCalleeChain.push_back({NewCallsiteInfo, FS}); + }; + + // Look for tail calls in this function, and check if they either call the + // profiled callee directly, or indirectly (via a recursive search). + // Only succeed if there is a single unique tail call chain found between the + // profiled caller and callee, otherwise we could perform incorrect cloning. + bool FoundSingleCalleeChain = false; + for (auto &S : CurCallee.getSummaryList()) { + if (!GlobalValue::isLocalLinkage(S->linkage()) && + !isPrevailing(CurCallee.getGUID(), S.get())) + continue; + auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject()); + if (!FS) + continue; + auto FSVI = CurCallee; + auto *AS = dyn_cast<AliasSummary>(S.get()); + if (AS) + FSVI = AS->getAliaseeVI(); + for (auto &CallEdge : FS->calls()) { + if (!CallEdge.second.hasTailCall()) + continue; + if (CallEdge.first == ProfiledCallee) { + if (FoundSingleCalleeChain) { + FoundMultipleCalleeChains = true; + return false; + } + FoundSingleCalleeChain = true; + FoundProfiledCalleeCount++; + FoundProfiledCalleeDepth += Depth; + if (Depth > FoundProfiledCalleeMaxDepth) + FoundProfiledCalleeMaxDepth = Depth; + CreateAndSaveCallsiteInfo(CallEdge.first, FS); + // Add FS to FSToVIMap in case it isn't already there. + assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI); + FSToVIMap[FS] = FSVI; + } else if (findProfiledCalleeThroughTailCalls( + ProfiledCallee, CallEdge.first, Depth + 1, + FoundCalleeChain, FoundMultipleCalleeChains)) { + if (FoundMultipleCalleeChains) + return false; + if (FoundSingleCalleeChain) { + FoundMultipleCalleeChains = true; + return false; + } + FoundSingleCalleeChain = true; + CreateAndSaveCallsiteInfo(CallEdge.first, FS); + // Add FS to FSToVIMap in case it isn't already there. + assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI); + FSToVIMap[FS] = FSVI; + } + } + } + + return FoundSingleCalleeChain; +} + +bool IndexCallsiteContextGraph::calleeMatchesFunc( + IndexCall &Call, const FunctionSummary *Func, + const FunctionSummary *CallerFunc, + std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) { ValueInfo Callee = dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee; // If there is no summary list then this is a call to an externally defined @@ -1666,11 +2017,38 @@ bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call, ? nullptr : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get()); assert(FSToVIMap.count(Func)); - return Callee == FSToVIMap[Func] || - // If callee is an alias, check the aliasee, since only function - // summary base objects will contain the stack node summaries and thus - // get a context node. - (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]); + auto FuncVI = FSToVIMap[Func]; + if (Callee == FuncVI || + // If callee is an alias, check the aliasee, since only function + // summary base objects will contain the stack node summaries and thus + // get a context node. + (Alias && Alias->getAliaseeVI() == FuncVI)) + return true; + + // Recursively search for the profiled callee through tail calls starting with + // the actual Callee. The discovered tail call chain is saved in + // FoundCalleeChain, and we will fixup the graph to include these callsites + // after returning. + // FIXME: We will currently redo the same recursive walk if we find the same + // mismatched callee from another callsite. We can improve this with more + // bookkeeping of the created chain of new nodes for each mismatch. + unsigned Depth = 1; + bool FoundMultipleCalleeChains = false; + if (!findProfiledCalleeThroughTailCalls( + FuncVI, Callee, Depth, FoundCalleeChain, FoundMultipleCalleeChains)) { + LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " << FuncVI + << " from " << FSToVIMap[CallerFunc] + << " that actually called " << Callee + << (FoundMultipleCalleeChains + ? " (found multiple possible chains)" + : "") + << "\n"); + if (FoundMultipleCalleeChains) + FoundProfiledCalleeNonUniquelyCount++; + return false; + } + + return true; } static std::string getAllocTypeString(uint8_t AllocTypes) { @@ -2533,6 +2911,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { // that were previously assigned to call PreviousAssignedFuncClone, // to record that they now call NewFuncClone. for (auto CE : Clone->CallerEdges) { + // Skip any that have been removed on an earlier iteration. + if (!CE) + continue; // Ignore any caller that does not have a recorded callsite Call. if (!CE->Caller->hasCall()) continue; @@ -2945,6 +3326,42 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { NumClonesCreated = NumClones; }; + auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB, + Function *CalledFunction) { + // Perform cloning if not yet done. + CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); + + // Should have skipped indirect calls via mayHaveMemprofSummary. + assert(CalledFunction); + assert(!IsMemProfClone(*CalledFunction)); + + // Update the calls per the summary info. + // Save orig name since it gets updated in the first iteration + // below. + auto CalleeOrigName = CalledFunction->getName(); + for (unsigned J = 0; J < StackNode.Clones.size(); J++) { + // Do nothing if this version calls the original version of its + // callee. + if (!StackNode.Clones[J]) + continue; + auto NewF = M.getOrInsertFunction( + getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]), + CalledFunction->getFunctionType()); + CallBase *CBClone; + // Copy 0 is the original function. + if (!J) + CBClone = CB; + else + CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); + CBClone->setCalledFunction(NewF); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) + << ore::NV("Call", CBClone) << " in clone " + << ore::NV("Caller", CBClone->getFunction()) + << " assigned to call function clone " + << ore::NV("Callee", NewF.getCallee())); + } + }; + // Locate the summary for F. ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary); // If not found, this could be an imported local (see comment in @@ -2974,6 +3391,23 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { auto SI = FS->callsites().begin(); auto AI = FS->allocs().begin(); + // To handle callsite infos synthesized for tail calls which have missing + // frames in the profiled context, map callee VI to the synthesized callsite + // info. + DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite; + // Iterate the callsites for this function in reverse, since we place all + // those synthesized for tail calls at the end. + for (auto CallsiteIt = FS->callsites().rbegin(); + CallsiteIt != FS->callsites().rend(); CallsiteIt++) { + auto &Callsite = *CallsiteIt; + // Stop as soon as we see a non-synthesized callsite info (see comment + // above loop). All the entries added for discovered tail calls have empty + // stack ids. + if (!Callsite.StackIdIndices.empty()) + break; + MapTailCallCalleeVIToCallsite.insert({Callsite.Callee, Callsite}); + } + // Assume for now that the instructions are in the exact same order // as when the summary was created, but confirm this is correct by // matching the stack ids. @@ -3126,37 +3560,16 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { } #endif - // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); - - // Should have skipped indirect calls via mayHaveMemprofSummary. - assert(CalledFunction); - assert(!IsMemProfClone(*CalledFunction)); - - // Update the calls per the summary info. - // Save orig name since it gets updated in the first iteration - // below. - auto CalleeOrigName = CalledFunction->getName(); - for (unsigned J = 0; J < StackNode.Clones.size(); J++) { - // Do nothing if this version calls the original version of its - // callee. - if (!StackNode.Clones[J]) - continue; - auto NewF = M.getOrInsertFunction( - getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]), - CalledFunction->getFunctionType()); - CallBase *CBClone; - // Copy 0 is the original function. - if (!J) - CBClone = CB; - else - CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); - CBClone->setCalledFunction(NewF); - ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) - << ore::NV("Call", CBClone) << " in clone " - << ore::NV("Caller", CBClone->getFunction()) - << " assigned to call function clone " - << ore::NV("Callee", NewF.getCallee())); + CloneCallsite(StackNode, CB, CalledFunction); + } else if (CB->isTailCall()) { + // Locate the synthesized callsite info for the callee VI, if any was + // created, and use that for cloning. + ValueInfo CalleeVI = + findValueInfoForFunc(*CalledFunction, M, ImportSummary); + if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { + auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); + assert(Callsite != MapTailCallCalleeVIToCallsite.end()); + CloneCallsite(Callsite->second, CB, CalledFunction); } } // Memprof and callsite metadata on memory allocations no longer needed. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 96b612254ca5..c7e6f32c5406 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1723,6 +1723,30 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()}, {Builder.CreateOr(A, B)})); + // Fold the log2_ceil idiom: + // zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1)) + // --> + // BW - ctlz(A - 1, false) + const APInt *XorC; + if (match(&I, + m_c_Add( + m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)), + m_One())), + m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor( + m_OneUse(m_TruncOrSelf(m_OneUse( + m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))), + m_APInt(XorC))))))) && + (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) && + *XorC == A->getType()->getScalarSizeInBits() - 1) { + Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType())); + Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()}, + {Sub, Builder.getFalse()}); + Value *Ret = Builder.CreateSub( + ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()), + Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true); + return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType())); + } + if (Instruction *Res = foldSquareSumInt(I)) return Res; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index c03f50d75814..0620752e3213 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -46,44 +46,6 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, return Builder.CreateFCmp(NewPred, LHS, RHS); } -/// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or -/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B)) -/// \param I Binary operator to transform. -/// \return Pointer to node that must replace the original binary operator, or -/// null pointer if no transformation was made. -static Value *SimplifyBSwap(BinaryOperator &I, - InstCombiner::BuilderTy &Builder) { - assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying"); - - Value *OldLHS = I.getOperand(0); - Value *OldRHS = I.getOperand(1); - - Value *NewLHS; - if (!match(OldLHS, m_BSwap(m_Value(NewLHS)))) - return nullptr; - - Value *NewRHS; - const APInt *C; - - if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) { - // OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) - if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse()) - return nullptr; - // NewRHS initialized by the matcher. - } else if (match(OldRHS, m_APInt(C))) { - // OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) - if (!OldLHS->hasOneUse()) - return nullptr; - NewRHS = ConstantInt::get(I.getType(), C->byteSwap()); - } else - return nullptr; - - Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS); - Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, - I.getType()); - return Builder.CreateCall(F, BinOp); -} - /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates /// whether to treat V, Lo, and Hi as signed or not. @@ -2159,6 +2121,64 @@ Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) { return BinaryOperator::Create(ShiftOp, NewC, ShAmt); } +// Fold and/or/xor with two equal intrinsic IDs: +// bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt)) +// -> fshl(bitwise(A, C), bitwise(B, D), ShAmt) +// bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt)) +// -> fshr(bitwise(A, C), bitwise(B, D), ShAmt) +// bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B)) +// bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C))) +// bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B)) +// bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C))) +static Instruction * +foldBitwiseLogicWithIntrinsics(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.isBitwiseLogicOp() && "Should and/or/xor"); + if (!I.getOperand(0)->hasOneUse()) + return nullptr; + IntrinsicInst *X = dyn_cast<IntrinsicInst>(I.getOperand(0)); + if (!X) + return nullptr; + + IntrinsicInst *Y = dyn_cast<IntrinsicInst>(I.getOperand(1)); + if (Y && (!Y->hasOneUse() || X->getIntrinsicID() != Y->getIntrinsicID())) + return nullptr; + + Intrinsic::ID IID = X->getIntrinsicID(); + const APInt *RHSC; + // Try to match constant RHS. + if (!Y && (!(IID == Intrinsic::bswap || IID == Intrinsic::bitreverse) || + !match(I.getOperand(1), m_APInt(RHSC)))) + return nullptr; + + switch (IID) { + case Intrinsic::fshl: + case Intrinsic::fshr: { + if (X->getOperand(2) != Y->getOperand(2)) + return nullptr; + Value *NewOp0 = + Builder.CreateBinOp(I.getOpcode(), X->getOperand(0), Y->getOperand(0)); + Value *NewOp1 = + Builder.CreateBinOp(I.getOpcode(), X->getOperand(1), Y->getOperand(1)); + Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType()); + return CallInst::Create(F, {NewOp0, NewOp1, X->getOperand(2)}); + } + case Intrinsic::bswap: + case Intrinsic::bitreverse: { + Value *NewOp0 = Builder.CreateBinOp( + I.getOpcode(), X->getOperand(0), + Y ? Y->getOperand(0) + : ConstantInt::get(I.getType(), IID == Intrinsic::bswap + ? RHSC->byteSwap() + : RHSC->reverseBits())); + Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType()); + return CallInst::Create(F, {NewOp0}); + } + default: + return nullptr; + } +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2194,9 +2214,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (Value *V = foldUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I, Builder)) - return replaceInstUsesWith(I, V); - if (Instruction *R = foldBinOpShiftWithShift(I)) return R; @@ -2688,6 +2705,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) return Res; + if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) + return Res; + return nullptr; } @@ -3347,9 +3367,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Value *V = foldUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I, Builder)) - return replaceInstUsesWith(I, V); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Type *Ty = I.getType(); if (Ty->isIntOrIntVectorTy(1)) { @@ -3884,6 +3901,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2)); } + if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) + return Res; + return nullptr; } @@ -4507,9 +4527,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; - if (Value *V = SimplifyBSwap(I, Builder)) - return replaceInstUsesWith(I, V); - if (Instruction *R = foldNot(I)) return R; @@ -4799,5 +4816,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) return Res; + if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) + return Res; + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 40b48699f758..64fbd5543a9e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1884,6 +1884,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return crossLogicOpFold; } + // Try to fold into bitreverse if bswap is the root of the expression tree. + if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false, + /*MatchBitReversals*/ true)) + return BitOp; break; } case Intrinsic::masked_load: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index ab55f235920a..21bfc91148bf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1704,11 +1704,11 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS) && !isa<Constant>(CmpLHS)) { if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { // Transform (X == C) ? X : Y -> (X == C) ? C : Y - SI.setOperand(1, CmpRHS); + replaceOperand(SI, 1, CmpRHS); Changed = true; } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) { // Transform (X != C) ? Y : X -> (X != C) ? Y : C - SI.setOperand(2, CmpRHS); + replaceOperand(SI, 2, CmpRHS); Changed = true; } } diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index e3deafa49bd9..5e7e08eaa997 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -216,7 +216,7 @@ static cl::opt<bool> ClInstrumentWrites( cl::Hidden, cl::init(true)); static cl::opt<bool> - ClUseStackSafety("asan-use-stack-safety", cl::Hidden, cl::init(false), + ClUseStackSafety("asan-use-stack-safety", cl::Hidden, cl::init(true), cl::Hidden, cl::desc("Use Stack Safety analysis results"), cl::Optional); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 6b95c7028d93..c20fc942eaf0 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -617,9 +617,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { std::vector<uint8_t> Indexes; JamCRC JC; for (auto &BB : F) { - const Instruction *TI = BB.getTerminator(); - for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { - BasicBlock *Succ = TI->getSuccessor(I); + for (BasicBlock *Succ : successors(&BB)) { auto BI = findBBInfo(Succ); if (BI == nullptr) continue; @@ -658,10 +656,10 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { << " CRC = " << JC.getCRC() << ", Selects = " << SIVisitor.getNumOfSelectInsts() << ", Edges = " << MST.numEdges() << ", ICSites = " - << ValueSites[IPVK_IndirectCallTarget].size()); - LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() - << ", High32 CRC = " << JCH.getCRC()); - LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); + << ValueSites[IPVK_IndirectCallTarget].size() + << ", Memops = " << ValueSites[IPVK_MemOPSize].size() + << ", High32 CRC = " << JCH.getCRC() + << ", Hash = " << FunctionHash << "\n";); if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp index eef94636578d..533cefaf1061 100644 --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -207,6 +207,12 @@ struct FlattenInfo { match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(OuterInductionPHI)), m_Value(MatchedItCount))); + // Matches the pattern ptr+i*M+j, with the two additions being done via GEP. + bool IsGEP = match(U, m_GEP(m_GEP(m_Value(), m_Value(MatchedMul)), + m_Specific(InnerInductionPHI))) && + match(MatchedMul, m_c_Mul(m_Specific(OuterInductionPHI), + m_Value(MatchedItCount))); + if (!MatchedItCount) return false; @@ -224,7 +230,7 @@ struct FlattenInfo { // Look through extends if the IV has been widened. Don't look through // extends if we already looked through a trunc. - if (Widened && IsAdd && + if (Widened && (IsAdd || IsGEP) && (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) { assert(MatchedItCount->getType() == InnerInductionPHI->getType() && "Unexpected type mismatch in types after widening"); @@ -236,7 +242,7 @@ struct FlattenInfo { LLVM_DEBUG(dbgs() << "Looking for inner trip count: "; InnerTripCount->dump()); - if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) { + if ((IsAdd || IsAddTrunc || IsGEP) && MatchedItCount == InnerTripCount) { LLVM_DEBUG(dbgs() << "Found. This sse is optimisable\n"); ValidOuterPHIUses.insert(MatchedMul); LinearIVUses.insert(U); @@ -646,33 +652,40 @@ static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT, if (OR != OverflowResult::MayOverflow) return OR; - for (Value *V : FI.LinearIVUses) { - for (Value *U : V->users()) { - if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { - for (Value *GEPUser : U->users()) { - auto *GEPUserInst = cast<Instruction>(GEPUser); - if (!isa<LoadInst>(GEPUserInst) && - !(isa<StoreInst>(GEPUserInst) && - GEP == GEPUserInst->getOperand(1))) - continue; - if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst, - FI.InnerLoop)) - continue; - // The IV is used as the operand of a GEP which dominates the loop - // latch, and the IV is at least as wide as the address space of the - // GEP. In this case, the GEP would wrap around the address space - // before the IV increment wraps, which would be UB. - if (GEP->isInBounds() && - V->getType()->getIntegerBitWidth() >= - DL.getPointerTypeSizeInBits(GEP->getType())) { - LLVM_DEBUG( - dbgs() << "use of linear IV would be UB if overflow occurred: "; - GEP->dump()); - return OverflowResult::NeverOverflows; - } - } + auto CheckGEP = [&](GetElementPtrInst *GEP, Value *GEPOperand) { + for (Value *GEPUser : GEP->users()) { + auto *GEPUserInst = cast<Instruction>(GEPUser); + if (!isa<LoadInst>(GEPUserInst) && + !(isa<StoreInst>(GEPUserInst) && GEP == GEPUserInst->getOperand(1))) + continue; + if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst, FI.InnerLoop)) + continue; + // The IV is used as the operand of a GEP which dominates the loop + // latch, and the IV is at least as wide as the address space of the + // GEP. In this case, the GEP would wrap around the address space + // before the IV increment wraps, which would be UB. + if (GEP->isInBounds() && + GEPOperand->getType()->getIntegerBitWidth() >= + DL.getPointerTypeSizeInBits(GEP->getType())) { + LLVM_DEBUG( + dbgs() << "use of linear IV would be UB if overflow occurred: "; + GEP->dump()); + return true; } } + return false; + }; + + // Check if any IV user is, or is used by, a GEP that would cause UB if the + // multiply overflows. + for (Value *V : FI.LinearIVUses) { + if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) + if (GEP->getNumIndices() == 1 && CheckGEP(GEP, GEP->getOperand(1))) + return OverflowResult::NeverOverflows; + for (Value *U : V->users()) + if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) + if (CheckGEP(GEP, V)) + return OverflowResult::NeverOverflows; } return OverflowResult::MayOverflow; @@ -778,6 +791,18 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(), "flatten.trunciv"); + if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) { + // Replace the GEP with one that uses OuterValue as the offset. + auto *InnerGEP = cast<GetElementPtrInst>(GEP->getOperand(0)); + Value *Base = InnerGEP->getOperand(0); + // When the base of the GEP doesn't dominate the outer induction phi then + // we need to insert the new GEP where the old GEP was. + if (!DT->dominates(Base, &*Builder.GetInsertPoint())) + Builder.SetInsertPoint(cast<Instruction>(V)); + OuterValue = Builder.CreateGEP(GEP->getSourceElementType(), Base, + OuterValue, "flatten." + V->getName()); + } + LLVM_DEBUG(dbgs() << "Replacing: "; V->dump(); dbgs() << "with: "; OuterValue->dump()); V->replaceAllUsesWith(OuterValue); diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 3f02441b74ba..b98f823ab00b 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1975,19 +1975,10 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs, assert(AllocaMap.count(OriginalValue)); Value *Alloca = AllocaMap[OriginalValue]; - // Emit store into the related alloca - // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to - // the correct type according to alloca. + // Emit store into the related alloca. assert(Relocate->getNextNode() && "Should always have one since it's not a terminator"); - IRBuilder<> Builder(Relocate->getNextNode()); - Value *CastedRelocatedValue = - Builder.CreateBitCast(Relocate, - cast<AllocaInst>(Alloca)->getAllocatedType(), - suffixed_name_or(Relocate, ".casted", "")); - - new StoreInst(CastedRelocatedValue, Alloca, - cast<Instruction>(CastedRelocatedValue)->getNextNode()); + new StoreInst(Relocate, Alloca, Relocate->getNextNode()); #ifndef NDEBUG VisitedLiveValues.insert(OriginalValue); @@ -2620,13 +2611,9 @@ static bool inlineGetBaseAndOffset(Function &F, Value *Base = findBasePointer(Callsite->getOperand(0), DVCache, KnownBases); assert(!DVCache.count(Callsite)); - auto *BaseBC = IRBuilder<>(Callsite).CreateBitCast( - Base, Callsite->getType(), suffixed_name_or(Base, ".cast", "")); - if (BaseBC != Base) - DVCache[BaseBC] = Base; - Callsite->replaceAllUsesWith(BaseBC); - if (!BaseBC->hasName()) - BaseBC->takeName(Callsite); + Callsite->replaceAllUsesWith(Base); + if (!Base->hasName()) + Base->takeName(Callsite); Callsite->eraseFromParent(); break; } diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 225dd454068c..d2fed11445e4 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1093,67 +1093,25 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // => add the offset // // %gep2 ; clone of %gep - // %new.gep = gep %gep2, <offset / sizeof(*%gep)> + // %new.gep = gep i8, %gep2, %offset // %gep ; will be removed // ... %gep ... // // => replace all uses of %gep with %new.gep and remove %gep // // %gep2 ; clone of %gep - // %new.gep = gep %gep2, <offset / sizeof(*%gep)> - // ... %new.gep ... - // - // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an - // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep): - // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the - // type of %gep. - // - // %gep2 ; clone of %gep - // %0 = bitcast %gep2 to i8* - // %uglygep = gep %0, <offset> - // %new.gep = bitcast %uglygep to <type of %gep> + // %new.gep = gep i8, %gep2, %offset // ... %new.gep ... Instruction *NewGEP = GEP->clone(); NewGEP->insertBefore(GEP); - // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned = - // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is - // used with unsigned integers later. - int64_t ElementTypeSizeOfGEP = static_cast<int64_t>( - DL->getTypeAllocSize(GEP->getResultElementType())); Type *PtrIdxTy = DL->getIndexType(GEP->getType()); - if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { - // Very likely. As long as %gep is naturally aligned, the byte offset we - // extracted should be a multiple of sizeof(*%gep). - int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP; - NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP, - ConstantInt::get(PtrIdxTy, Index, true), - GEP->getName(), GEP); - NewGEP->copyMetadata(*GEP); - // Inherit the inbounds attribute of the original GEP. - cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds); - } else { - // Unlikely but possible. For example, - // #pragma pack(1) - // struct S { - // int a[3]; - // int64 b[8]; - // }; - // #pragma pack() - // - // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After - // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is - // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of - // sizeof(int64). - // - // Emit an uglygep in this case. - IRBuilder<> Builder(GEP); - NewGEP = cast<Instruction>(Builder.CreateGEP( - Builder.getInt8Ty(), NewGEP, - {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep", - GEPWasInBounds)); - NewGEP->copyMetadata(*GEP); - } + IRBuilder<> Builder(GEP); + NewGEP = cast<Instruction>(Builder.CreateGEP( + Builder.getInt8Ty(), NewGEP, + {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, + GEP->getName(), GEPWasInBounds)); + NewGEP->copyMetadata(*GEP); GEP->replaceAllUsesWith(NewGEP); GEP->eraseFromParent(); diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index ca1f3a0c0ae3..2cce6eb22341 100644 --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -233,13 +233,9 @@ private: void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, GetElementPtrInst *GEP); - // Emit code that computes the "bump" from Basis to C. If the candidate is a - // GEP and the bump is not divisible by the element size of the GEP, this - // function sets the BumpWithUglyGEP flag to notify its caller to bump the - // basis using an ugly GEP. + // Emit code that computes the "bump" from Basis to C. static Value *emitBump(const Candidate &Basis, const Candidate &C, - IRBuilder<> &Builder, const DataLayout *DL, - bool &BumpWithUglyGEP); + IRBuilder<> &Builder, const DataLayout *DL); const DataLayout *DL = nullptr; DominatorTree *DT = nullptr; @@ -581,26 +577,11 @@ static void unifyBitWidth(APInt &A, APInt &B) { Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, const Candidate &C, IRBuilder<> &Builder, - const DataLayout *DL, - bool &BumpWithUglyGEP) { + const DataLayout *DL) { APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue(); unifyBitWidth(Idx, BasisIdx); APInt IndexOffset = Idx - BasisIdx; - BumpWithUglyGEP = false; - if (Basis.CandidateKind == Candidate::GEP) { - APInt ElementSize( - IndexOffset.getBitWidth(), - DL->getTypeAllocSize( - cast<GetElementPtrInst>(Basis.Ins)->getResultElementType())); - APInt Q, R; - APInt::sdivrem(IndexOffset, ElementSize, Q, R); - if (R == 0) - IndexOffset = Q; - else - BumpWithUglyGEP = true; - } - // Compute Bump = C - Basis = (i' - i) * S. // Common case 1: if (i' - i) is 1, Bump = S. if (IndexOffset == 1) @@ -645,8 +626,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( return; IRBuilder<> Builder(C.Ins); - bool BumpWithUglyGEP; - Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP); + Value *Bump = emitBump(Basis, C, Builder, DL); Value *Reduced = nullptr; // equivalent to but weaker than C.Ins switch (C.CandidateKind) { case Candidate::Add: @@ -673,28 +653,13 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( } break; } - case Candidate::GEP: - { - Type *OffsetTy = DL->getIndexType(C.Ins->getType()); + case Candidate::GEP: { bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds(); - if (BumpWithUglyGEP) { - // C = (char *)Basis + Bump - unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); - Type *CharTy = PointerType::get(Basis.Ins->getContext(), AS); - Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); - Reduced = - Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds); - Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); - } else { - // C = gep Basis, Bump - // Canonicalize bump to pointer size. - Bump = Builder.CreateSExtOrTrunc(Bump, OffsetTy); - Reduced = Builder.CreateGEP( - cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(), Basis.Ins, - Bump, "", InBounds); - } - break; - } + // C = (char *)Basis + Bump + Reduced = + Builder.CreateGEP(Builder.getInt8Ty(), Basis.Ins, Bump, "", InBounds); + break; + } default: llvm_unreachable("C.CandidateKind is invalid"); }; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index c76cc9db16d7..b9cad764aaef 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3905,7 +3905,8 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( SmallVectorImpl<Instruction *> &InsertedInsts) { if (!match(I, m_Or(m_Value(), m_Value())) && !match(I, m_FShl(m_Value(), m_Value(), m_Value())) && - !match(I, m_FShr(m_Value(), m_Value(), m_Value()))) + !match(I, m_FShr(m_Value(), m_Value(), m_Value())) && + !match(I, m_BSwap(m_Value()))) return false; if (!MatchBSwaps && !MatchBitReversals) return false; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 61d891d65346..7515e539e7fb 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6919,18 +6919,17 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, auto *Ty = cast<IntegerType>(SI->getCondition()->getType()); Builder.SetInsertPoint(SI); - auto *ShiftC = ConstantInt::get(Ty, Shift); - auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); - auto *LShr = Builder.CreateLShr(Sub, ShiftC); - auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); - auto *Rot = Builder.CreateOr(LShr, Shl); + Value *Sub = + Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); + Value *Rot = Builder.CreateIntrinsic( + Ty, Intrinsic::fshl, + {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)}); SI->replaceUsesOfWith(SI->getCondition(), Rot); for (auto Case : SI->cases()) { auto *Orig = Case.getCaseValue(); auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); - Case.setValue( - cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); + Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift)))); } return true; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 51ce88480c08..9743fa0e7402 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5004,9 +5004,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor( VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost); #ifndef NDEBUG - unsigned AssumedMinimumVscale = 1; - if (std::optional<unsigned> VScale = getVScaleForTuning(OrigLoop, TTI)) - AssumedMinimumVscale = *VScale; + unsigned AssumedMinimumVscale = + getVScaleForTuning(OrigLoop, TTI).value_or(1); unsigned Width = Candidate.Width.isScalable() ? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale @@ -8031,6 +8030,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan); if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too. BlockMaskCache[BB] = EdgeMask; + return; } if (!BlockMask) { // BlockMask has its initialized nullptr value. diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8e22b54f002d..055fbb00871f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6894,6 +6894,31 @@ protected: }; } // namespace +/// Returns the cost of the shuffle instructions with the given \p Kind, vector +/// type \p Tp and optional \p Mask. Adds SLP-specifc cost estimation for insert +/// subvector pattern. +static InstructionCost +getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind, + VectorType *Tp, ArrayRef<int> Mask = std::nullopt, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + int Index = 0, VectorType *SubTp = nullptr, + ArrayRef<const Value *> Args = std::nullopt) { + if (Kind != TTI::SK_PermuteTwoSrc) + return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args); + int NumSrcElts = Tp->getElementCount().getKnownMinValue(); + int NumSubElts; + if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask( + Mask, NumSrcElts, NumSubElts, Index)) { + if (Index + NumSubElts > NumSrcElts && + Index + NumSrcElts <= static_cast<int>(Mask.size())) + return TTI.getShuffleCost( + TTI::SK_InsertSubvector, + FixedVectorType::get(Tp->getElementType(), Mask.size()), std::nullopt, + TTI::TCK_RecipThroughput, Index, Tp); + } + return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args); +} + /// Merges shuffle masks and emits final shuffle instruction, if required. It /// supports shuffling of 2 input vectors. It implements lazy shuffles emission, /// when the actual shuffle instruction is generated only if this is actually @@ -7141,15 +7166,15 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { std::optional<TTI::ShuffleKind> RegShuffleKind = CheckPerRegistersShuffle(SubMask); if (!RegShuffleKind) { - Cost += TTI.getShuffleCost( - *ShuffleKinds[Part], + Cost += ::getShuffleCost( + TTI, *ShuffleKinds[Part], FixedVectorType::get(VL.front()->getType(), NumElts), MaskSlice); continue; } if (*RegShuffleKind != TTI::SK_PermuteSingleSrc || !ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) { - Cost += TTI.getShuffleCost( - *RegShuffleKind, + Cost += ::getShuffleCost( + TTI, *RegShuffleKind, FixedVectorType::get(VL.front()->getType(), EltsPerVector), SubMask); } @@ -7222,8 +7247,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue(); if (isEmptyOrIdentity(Mask, VF)) return TTI::TCC_Free; - return TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, - cast<VectorType>(V1->getType()), Mask); + return ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc, + cast<VectorType>(V1->getType()), Mask); } InstructionCost createShuffleVector(Value *V1, ArrayRef<int> Mask) const { // Empty mask or identity mask are free. @@ -8101,7 +8126,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I) Mask[I] = ((I >= InMask.size()) || InMask.test(I)) ? PoisonMaskElem : I; - Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask); + Cost += + ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, InsertVecTy, Mask); } } return Cost; @@ -8428,8 +8454,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, return I->getOpcode() == E->getAltOpcode(); }, Mask); - VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - FinalVecTy, Mask); + VecCost += ::getShuffleCost(TTIRef, TargetTransformInfo::SK_PermuteTwoSrc, + FinalVecTy, Mask); // Patterns like [fadd,fsub] can be combined into a single instruction // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we // need to take into account their order when looking for the most used @@ -9133,7 +9159,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) { auto *FTy = FixedVectorType::get(TEs.back()->Scalars.front()->getType(), VF); InstructionCost C = - TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask); + ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, FTy, Mask); LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C << " for final shuffle of vector node and external " "insertelement users.\n"; @@ -11991,8 +12017,12 @@ Value *BoUpSLP::vectorizeTree( IRBuilder<>::InsertPointGuard Guard(Builder); if (auto *IVec = dyn_cast<Instruction>(Vec)) Builder.SetInsertPoint(IVec->getNextNonDebugInstruction()); - Vec = Builder.CreateIntCast(Vec, VU->getType(), - BWIt->second.second); + Vec = Builder.CreateIntCast( + Vec, + FixedVectorType::get( + cast<VectorType>(VU->getType())->getElementType(), + cast<FixedVectorType>(Vec->getType())->getNumElements()), + BWIt->second.second); VectorCasts.try_emplace(Scalar, Vec); } else { Vec = VecIt->second; @@ -13070,10 +13100,14 @@ bool BoUpSLP::collectValuesToDemote( if (isa<Constant>(V)) return true; - // If the value is not a vectorized instruction in the expression with only - // one use, it cannot be demoted. + // If the value is not a vectorized instruction in the expression and not used + // by the insertelement instruction and not used in multiple vector nodes, it + // cannot be demoted. auto *I = dyn_cast<Instruction>(V); - if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert(I).second) + if (!I || !getTreeEntry(I) || MultiNodeScalars.contains(I) || + !Visited.insert(I).second || all_of(I->users(), [&](User *U) { + return isa<InsertElementInst>(U) && !getTreeEntry(U); + })) return false; unsigned Start = 0; @@ -13144,11 +13178,6 @@ bool BoUpSLP::collectValuesToDemote( } void BoUpSLP::computeMinimumValueSizes() { - // If there are no external uses, the expression tree must be rooted by a - // store. We can't demote in-memory values, so there is nothing to do here. - if (ExternalUses.empty()) - return; - // We only attempt to truncate integer expressions. auto &TreeRoot = VectorizableTree[0]->Scalars; auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType()); diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index e481f7e38e6a..f88e25ea1d16 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -1368,7 +1368,7 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const { if (immCodeUsesAPFloat()) Result += "cast<ConstantFPSDNode>(Node)->getValueAPF();\n"; else if (immCodeUsesAPInt()) - Result += "cast<ConstantSDNode>(Node)->getAPIntValue();\n"; + Result += "Node->getAsAPIntVal();\n"; else Result += "cast<ConstantSDNode>(Node)->getSExtValue();\n"; return Result + ImmCode; diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp index 6fd5698e7372..a3e2facf948e 100644 --- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp @@ -60,10 +60,8 @@ class MatcherTableEmitter { // all the patterns with "identical" predicates. StringMap<TinyPtrVector<TreePattern *>> NodePredicatesByCodeToRun; - StringMap<unsigned> PatternPredicateMap; std::vector<std::string> PatternPredicates; - DenseMap<const ComplexPattern*, unsigned> ComplexPatternMap; std::vector<const ComplexPattern*> ComplexPatterns; @@ -84,8 +82,50 @@ class MatcherTableEmitter { } public: - MatcherTableEmitter(const CodeGenDAGPatterns &cgp) - : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0) {} + MatcherTableEmitter(const Matcher *TheMatcher, const CodeGenDAGPatterns &cgp) + : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0) { + // Record the usage of ComplexPattern. + DenseMap<const ComplexPattern *, unsigned> ComplexPatternUsage; + // Record the usage of PatternPredicate. + std::map<StringRef, unsigned> PatternPredicateUsage; + + // Iterate the whole MatcherTable once and do some statistics. + std::function<void(const Matcher *)> Statistic = [&](const Matcher *N) { + while (N) { + if (auto *SM = dyn_cast<ScopeMatcher>(N)) + for (unsigned I = 0; I < SM->getNumChildren(); I++) + Statistic(SM->getChild(I)); + else if (auto *SOM = dyn_cast<SwitchOpcodeMatcher>(N)) + for (unsigned I = 0; I < SOM->getNumCases(); I++) + Statistic(SOM->getCaseMatcher(I)); + else if (auto *STM = dyn_cast<SwitchTypeMatcher>(N)) + for (unsigned I = 0; I < STM->getNumCases(); I++) + Statistic(STM->getCaseMatcher(I)); + else if (auto *CPM = dyn_cast<CheckComplexPatMatcher>(N)) + ++ComplexPatternUsage[&CPM->getPattern()]; + else if (auto *CPPM = dyn_cast<CheckPatternPredicateMatcher>(N)) + ++PatternPredicateUsage[CPPM->getPredicate()]; + N = N->getNext(); + } + }; + Statistic(TheMatcher); + + // Sort ComplexPatterns by usage. + std::vector<std::pair<const ComplexPattern *, unsigned>> ComplexPatternList( + ComplexPatternUsage.begin(), ComplexPatternUsage.end()); + sort(ComplexPatternList, + [](const auto &A, const auto &B) { return A.second > B.second; }); + for (const auto &ComplexPattern : ComplexPatternList) + ComplexPatterns.push_back(ComplexPattern.first); + + // Sort PatternPredicates by usage. + std::vector<std::pair<std::string, unsigned>> PatternPredicateList( + PatternPredicateUsage.begin(), PatternPredicateUsage.end()); + sort(PatternPredicateList, + [](const auto &A, const auto &B) { return A.second > B.second; }); + for (const auto &PatternPredicate : PatternPredicateList) + PatternPredicates.push_back(PatternPredicate.first); + } unsigned EmitMatcherList(const Matcher *N, const unsigned Indent, unsigned StartIdx, raw_ostream &OS); @@ -138,20 +178,10 @@ private: } unsigned getPatternPredicate(StringRef PredName) { - unsigned &Entry = PatternPredicateMap[PredName]; - if (Entry == 0) { - PatternPredicates.push_back(PredName.str()); - Entry = PatternPredicates.size(); - } - return Entry-1; + return llvm::find(PatternPredicates, PredName) - PatternPredicates.begin(); } unsigned getComplexPat(const ComplexPattern &P) { - unsigned &Entry = ComplexPatternMap[&P]; - if (Entry == 0) { - ComplexPatterns.push_back(&P); - Entry = ComplexPatterns.size(); - } - return Entry-1; + return llvm::find(ComplexPatterns, &P) - ComplexPatterns.begin(); } unsigned getNodeXFormID(Record *Rec) { @@ -486,13 +516,15 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx, StringRef Pred = cast<CheckPatternPredicateMatcher>(N)->getPredicate(); unsigned PredNo = getPatternPredicate(Pred); if (PredNo > 255) - OS << "OPC_CheckPatternPredicate2, TARGET_VAL(" << PredNo << "),"; + OS << "OPC_CheckPatternPredicateTwoByte, TARGET_VAL(" << PredNo << "),"; + else if (PredNo < 8) + OS << "OPC_CheckPatternPredicate" << PredNo << ','; else OS << "OPC_CheckPatternPredicate, " << PredNo << ','; if (!OmitComments) OS << " // " << Pred; OS << '\n'; - return 2 + (PredNo > 255); + return 2 + (PredNo > 255) - (PredNo < 8); } case Matcher::CheckPredicate: { TreePredicateFn Pred = cast<CheckPredicateMatcher>(N)->getPredicate(); @@ -652,8 +684,13 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx, case Matcher::CheckComplexPat: { const CheckComplexPatMatcher *CCPM = cast<CheckComplexPatMatcher>(N); const ComplexPattern &Pattern = CCPM->getPattern(); - OS << "OPC_CheckComplexPat, /*CP*/" << getComplexPat(Pattern) << ", /*#*/" - << CCPM->getMatchNumber() << ','; + unsigned PatternNo = getComplexPat(Pattern); + if (PatternNo < 8) + OS << "OPC_CheckComplexPat" << PatternNo << ", /*#*/" + << CCPM->getMatchNumber() << ','; + else + OS << "OPC_CheckComplexPat, /*CP*/" << PatternNo << ", /*#*/" + << CCPM->getMatchNumber() << ','; if (!OmitComments) { OS << " // " << Pattern.getSelectFunc(); @@ -665,7 +702,7 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx, OS << " + chain result"; } OS << '\n'; - return 3; + return PatternNo < 8 ? 2 : 3; } case Matcher::CheckAndImm: { @@ -1267,7 +1304,7 @@ void llvm::EmitMatcherTable(Matcher *TheMatcher, OS << "#endif\n\n"; BeginEmitFunction(OS, "void", "SelectCode(SDNode *N)", false/*AddOverride*/); - MatcherTableEmitter MatcherEmitter(CGP); + MatcherTableEmitter MatcherEmitter(TheMatcher, CGP); // First we size all the children of the three kinds of matchers that have // them. This is done by sharing the code in EmitMatcher(). but we don't diff --git a/llvm/utils/TableGen/ExegesisEmitter.cpp b/llvm/utils/TableGen/ExegesisEmitter.cpp index 736f1220be14..d48c7f3a480f 100644 --- a/llvm/utils/TableGen/ExegesisEmitter.cpp +++ b/llvm/utils/TableGen/ExegesisEmitter.cpp @@ -81,6 +81,11 @@ collectPfmCounters(const RecordKeeper &Records) { "duplicate ResourceName " + ResourceName); AddPfmCounterName(IssueCounter); } + + for (const Record *ValidationCounter : + Def->getValueAsListOfDefs("ValidationCounters")) + AddPfmCounterName(ValidationCounter); + AddPfmCounterName(Def->getValueAsDef("CycleCounter")); AddPfmCounterName(Def->getValueAsDef("UopsCounter")); } @@ -100,6 +105,17 @@ ExegesisEmitter::ExegesisEmitter(RecordKeeper &RK) Target = std::string(Targets[0]->getName()); } +struct ValidationCounterInfo { + int64_t EventNumber; + StringRef EventName; + unsigned PfmCounterID; +}; + +bool EventNumberLess(const ValidationCounterInfo &LHS, + const ValidationCounterInfo &RHS) { + return LHS.EventNumber < RHS.EventNumber; +} + void ExegesisEmitter::emitPfmCountersInfo(const Record &Def, unsigned &IssueCountersTableOffset, raw_ostream &OS) const { @@ -109,6 +125,31 @@ void ExegesisEmitter::emitPfmCountersInfo(const Record &Def, Def.getValueAsDef("UopsCounter")->getValueAsString("Counter"); const size_t NumIssueCounters = Def.getValueAsListOfDefs("IssueCounters").size(); + const size_t NumValidationCounters = + Def.getValueAsListOfDefs("ValidationCounters").size(); + + // Emit Validation Counters Array + if (NumValidationCounters != 0) { + std::vector<ValidationCounterInfo> ValidationCounters; + ValidationCounters.reserve(NumValidationCounters); + for (const Record *ValidationCounter : + Def.getValueAsListOfDefs("ValidationCounters")) { + ValidationCounters.push_back( + {ValidationCounter->getValueAsDef("EventType") + ->getValueAsInt("EventNumber"), + ValidationCounter->getValueAsDef("EventType")->getName(), + getPfmCounterId(ValidationCounter->getValueAsString("Counter"))}); + } + std::sort(ValidationCounters.begin(), ValidationCounters.end(), + EventNumberLess); + OS << "\nstatic const std::pair<ValidationEvent, const char*> " << Target + << Def.getName() << "ValidationCounters[] = {\n"; + for (const ValidationCounterInfo &VCI : ValidationCounters) { + OS << " { " << VCI.EventName << ", " << Target << "PfmCounterNames[" + << VCI.PfmCounterID << "]},\n"; + } + OS << "};\n"; + } OS << "\nstatic const PfmCountersInfo " << Target << Def.getName() << " = {\n"; @@ -129,10 +170,17 @@ void ExegesisEmitter::emitPfmCountersInfo(const Record &Def, // Issue Counters if (NumIssueCounters == 0) - OS << " nullptr, // No issue counters.\n 0\n"; + OS << " nullptr, 0, // No issue counters\n"; else OS << " " << Target << "PfmIssueCounters + " << IssueCountersTableOffset - << ", " << NumIssueCounters << " // Issue counters.\n"; + << ", " << NumIssueCounters << ", // Issue counters.\n"; + + // Validation Counters + if (NumValidationCounters == 0) + OS << " nullptr, 0 // No validation counters.\n"; + else + OS << " " << Target << Def.getName() << "ValidationCounters, " + << NumValidationCounters << " // Validation counters.\n"; OS << "};\n"; IssueCountersTableOffset += NumIssueCounters; diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index 348b3b3e0898..c092772386ec 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -2318,7 +2318,7 @@ bool CombineRuleBuilder::emitInstructionApplyPattern( M.actions_begin(), getLLTCodeGenOrTempType(Ty, M), TempRegID); } - DstMI.addRenderer<TempRegRenderer>(TempRegID); + DstMI.addRenderer<TempRegRenderer>(TempRegID, /*IsDef=*/true); } // Render MIFlags |
