diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-14 18:50:02 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-07-14 18:50:02 +0000 |
commit | 1f917f69ff07f09b6dbb670971f57f8efe718b84 (patch) | |
tree | 99293cbc1411737cd995dac10a99b2c40ef0944c | |
parent | 145449b1e420787bb99721a429341fa6be3adfb6 (diff) |
1054 files changed, 26865 insertions, 10460 deletions
diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h index d79586931995..0e5b43080e4b 100644 --- a/clang/include/clang/APINotes/Types.h +++ b/clang/include/clang/APINotes/Types.h @@ -76,7 +76,7 @@ public: } void setSwiftPrivate(llvm::Optional<bool> Private) { - SwiftPrivateSpecified = Private.hasValue(); + SwiftPrivateSpecified = Private.has_value(); SwiftPrivate = Private ? *Private : 0; } diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 7db6af9cb87d..85eba45e4de6 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -130,6 +130,7 @@ class TemplateDecl; class TemplateParameterList; class TemplateTemplateParmDecl; class TemplateTypeParmDecl; +class TypeConstraint; class UnresolvedSetIterator; class UsingShadowDecl; class VarTemplateDecl; @@ -260,7 +261,7 @@ class ASTContext : public RefCountedBase<ASTContext> { mutable llvm::FoldingSet<DeducedTemplateSpecializationType> DeducedTemplateSpecializationTypes; mutable llvm::FoldingSet<AtomicType> AtomicTypes; - llvm::FoldingSet<AttributedType> AttributedTypes; + mutable llvm::FoldingSet<AttributedType> AttributedTypes; mutable llvm::FoldingSet<PipeType> PipeTypes; mutable llvm::FoldingSet<BitIntType> BitIntTypes; mutable llvm::FoldingSet<DependentBitIntType> DependentBitIntTypes; @@ -1306,11 +1307,11 @@ public: /// declaration of a function with an exception specification is permitted /// and preserved. Other type sugar (for instance, typedefs) is not. QualType getFunctionTypeWithExceptionSpec( - QualType Orig, const FunctionProtoType::ExceptionSpecInfo &ESI); + QualType Orig, const FunctionProtoType::ExceptionSpecInfo &ESI) const; /// Determine whether two function types are the same, ignoring /// exception specifications in cases where they're part of the type. - bool hasSameFunctionTypeIgnoringExceptionSpec(QualType T, QualType U); + bool hasSameFunctionTypeIgnoringExceptionSpec(QualType T, QualType U) const; /// Change the exception specification on a function once it is /// delay-parsed, instantiated, or computed. @@ -1597,9 +1598,8 @@ public: QualType getInjectedClassNameType(CXXRecordDecl *Decl, QualType TST) const; - QualType getAttributedType(attr::Kind attrKind, - QualType modifiedType, - QualType equivalentType); + QualType getAttributedType(attr::Kind attrKind, QualType modifiedType, + QualType equivalentType) const; QualType getBTFTagAttributedType(const BTFTypeTagAttr *BTFAttr, QualType Wrapped); @@ -2654,25 +2654,33 @@ public: bool hasSameTemplateName(const TemplateName &X, const TemplateName &Y) const; /// Determine whether the two declarations refer to the same entity. - /// - /// FIXME: isSameEntity is not const due to its implementation calls - /// hasSameFunctionTypeIgnoringExceptionSpec which may alter this. - bool isSameEntity(const NamedDecl *X, const NamedDecl *Y); + bool isSameEntity(const NamedDecl *X, const NamedDecl *Y) const; /// Determine whether two template parameter lists are similar enough /// that they may be used in declarations of the same template. - /// - /// FIXME: isSameTemplateParameterList is not const since it calls - /// isSameTemplateParameter. bool isSameTemplateParameterList(const TemplateParameterList *X, - const TemplateParameterList *Y); + const TemplateParameterList *Y) const; /// Determine whether two template parameters are similar enough /// that they may be used in declarations of the same template. + bool isSameTemplateParameter(const NamedDecl *X, const NamedDecl *Y) const; + + /// Determine whether two 'requires' expressions are similar enough that they + /// may be used in re-declarations. /// - /// FIXME: isSameTemplateParameterList is not const since it calls - /// isSameEntity. - bool isSameTemplateParameter(const NamedDecl *X, const NamedDecl *Y); + /// Use of 'requires' isn't mandatory, works with constraints expressed in + /// other ways too. + bool isSameConstraintExpr(const Expr *XCE, const Expr *YCE) const; + + /// Determine whether two type contraint are similar enough that they could + /// used in declarations of the same template. + bool isSameTypeConstraint(const TypeConstraint *XTC, + const TypeConstraint *YTC) const; + + /// Determine whether two default template arguments are similar enough + /// that they may be used in declarations of the same template. + bool isSameDefaultTemplateArgument(const NamedDecl *X, + const NamedDecl *Y) const; /// Retrieve the "canonical" template argument. /// diff --git a/clang/include/clang/AST/ASTImportError.h b/clang/include/clang/AST/ASTImportError.h index 405790b6ded3..728314ca0936 100644 --- a/clang/include/clang/AST/ASTImportError.h +++ b/clang/include/clang/AST/ASTImportError.h @@ -19,7 +19,6 @@ namespace clang { class ASTImportError : public llvm::ErrorInfo<ASTImportError> { - public: /// \brief Kind of error when importing an AST component. enum ErrorKind { diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 3e4ccda73111..725bb0bced9c 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_AST_DECLTEMPLATE_H #include "clang/AST/ASTConcept.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" @@ -373,11 +374,19 @@ public: /// Set that the default argument was inherited from another parameter. void setInherited(const ASTContext &C, ParmDecl *InheritedFrom) { - assert(!isInherited() && "default argument already inherited"); InheritedFrom = getParmOwningDefaultArg(InheritedFrom); if (!isSet()) ValueOrInherited = InheritedFrom; - else + else if (auto *D = ValueOrInherited.template dyn_cast<ParmDecl *>()) { + assert(C.isSameDefaultTemplateArgument(D, InheritedFrom)); + ValueOrInherited = + new (allocateDefaultArgStorageChain(C)) Chain{InheritedFrom, get()}; + } else if (auto *Inherited = + ValueOrInherited.template dyn_cast<Chain *>()) { + assert(C.isSameDefaultTemplateArgument(Inherited->PrevDeclWithDefaultArg, + InheritedFrom)); + Inherited->PrevDeclWithDefaultArg = InheritedFrom; + } else ValueOrInherited = new (allocateDefaultArgStorageChain(C)) Chain{InheritedFrom, ValueOrInherited.template get<ArgType>()}; } diff --git a/clang/include/clang/AST/PropertiesBase.td b/clang/include/clang/AST/PropertiesBase.td index 559f29edcf0f..ec310a459927 100644 --- a/clang/include/clang/AST/PropertiesBase.td +++ b/clang/include/clang/AST/PropertiesBase.td @@ -520,15 +520,15 @@ let Class = PropertyTypeCase<APValue, "LValue"> in { if (hasBase) { if (isTypeInfo) { base = APValue::LValueBase::getTypeInfo( - TypeInfoLValue(typeInfo.getValue().getTypePtr()), type.getValue()); + TypeInfoLValue(typeInfo.value().getTypePtr()), type.value()); elemTy = base.getTypeInfoType(); } else if (isExpr) { - base = APValue::LValueBase(cast<Expr>(stmt.getValue()), - callIndex.getValue(), version.getValue()); + base = APValue::LValueBase(cast<Expr>(stmt.value()), + callIndex.value(), version.value()); elemTy = base.get<const Expr *>()->getType(); } else { - base = APValue::LValueBase(cast<ValueDecl>(decl.getValue()), - callIndex.getValue(), version.getValue()); + base = APValue::LValueBase(cast<ValueDecl>(decl.value()), + callIndex.value(), version.value()); elemTy = base.get<const ValueDecl *>()->getType(); } } diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h index c1100d8474aa..358ace0430f6 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -155,6 +155,7 @@ public: /// Returns a pointer value that represents a null pointer. Calls with /// `PointeeType` that are canonically equivalent will return the same result. + /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`. PointerValue &getOrCreateNullPointerValue(QualType PointeeType); /// Returns a symbolic boolean value that models a boolean literal equal to @@ -251,6 +252,17 @@ public: bool equivalentBoolValues(BoolValue &Val1, BoolValue &Val2); private: + struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { + static QualType getEmptyKey() { + // Allow a NULL `QualType` by using a different value as the empty key. + return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1)); + } + + using DenseMapInfo::getHashValue; + using DenseMapInfo::getTombstoneKey; + using DenseMapInfo::isEqual; + }; + /// Adds all constraints of the flow condition identified by `Token` and all /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used /// to track tokens of flow conditions that were already visited by recursive @@ -259,17 +271,18 @@ private: AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints, llvm::DenseSet<AtomicBoolValue *> &VisitedTokens); - /// Returns the result of satisfiability checking on `Constraints`. - /// Possible return values are: - /// - `Satisfiable`: There exists a satisfying assignment for `Constraints`. - /// - `Unsatisfiable`: There is no satisfying assignment for `Constraints`. - /// - `TimedOut`: The solver gives up on finding a satisfying assignment. + /// Returns the outcome of satisfiability checking on `Constraints`. + /// Possible outcomes are: + /// - `Satisfiable`: A satisfying assignment exists and is returned. + /// - `Unsatisfiable`: A satisfying assignment does not exist. + /// - `TimedOut`: The search for a satisfying assignment was not completed. Solver::Result querySolver(llvm::DenseSet<BoolValue *> Constraints); /// Returns true if the solver is able to prove that there is no satisfying /// assignment for `Constraints` bool isUnsatisfiable(llvm::DenseSet<BoolValue *> Constraints) { - return querySolver(std::move(Constraints)) == Solver::Result::Unsatisfiable; + return querySolver(std::move(Constraints)).getStatus() == + Solver::Result::Status::Unsatisfiable; } /// Returns a boolean value as a result of substituting `Val` and its sub @@ -311,7 +324,8 @@ private: // required to initialize the `PointeeLoc` field in `PointerValue`. Consider // creating a type-independent `NullPointerValue` without a `PointeeLoc` // field. - llvm::DenseMap<QualType, PointerValue *> NullPointerVals; + llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo> + NullPointerVals; AtomicBoolValue &TrueVal; AtomicBoolValue &FalseVal; diff --git a/clang/include/clang/Analysis/FlowSensitive/DebugSupport.h b/clang/include/clang/Analysis/FlowSensitive/DebugSupport.h new file mode 100644 index 000000000000..ef903d807e12 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/DebugSupport.h @@ -0,0 +1,63 @@ +//===-- DebugSupport.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines functions which generate more readable forms of data +// structures used in the dataflow analyses, for debugging purposes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DEBUGSUPPORT_H_ +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DEBUGSUPPORT_H_ + +#include <string> +#include <vector> + +#include "clang/Analysis/FlowSensitive/Solver.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseMap.h" + +namespace clang { +namespace dataflow { +/// Returns a string representation for the boolean value `B`. +/// +/// Atomic booleans appearing in the boolean value `B` are assigned to labels +/// either specified in `AtomNames` or created by default rules as B0, B1, ... +/// +/// Requirements: +/// +/// Names assigned to atoms should not be repeated in `AtomNames`. +std::string debugString( + const BoolValue &B, + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = {{}}); + +/// Returns a string representation for `Constraints` - a collection of boolean +/// formulas and the `Result` of satisfiability checking. +/// +/// Atomic booleans appearing in `Constraints` and `Result` are assigned to +/// labels either specified in `AtomNames` or created by default rules as B0, +/// B1, ... +/// +/// Requirements: +/// +/// Names assigned to atoms should not be repeated in `AtomNames`. +std::string debugString( + const std::vector<BoolValue *> &Constraints, const Solver::Result &Result, + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = {{}}); +inline std::string debugString( + const llvm::DenseSet<BoolValue *> &Constraints, + const Solver::Result &Result, + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = {{}}) { + std::vector<BoolValue *> ConstraintsVec(Constraints.begin(), + Constraints.end()); + return debugString(ConstraintsVec, Result, std::move(AtomNames)); +} + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DEBUGSUPPORT_H_ diff --git a/clang/include/clang/Analysis/FlowSensitive/Solver.h b/clang/include/clang/Analysis/FlowSensitive/Solver.h index 6b685b9b3c9a..93568b119793 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Solver.h +++ b/clang/include/clang/Analysis/FlowSensitive/Solver.h @@ -15,7 +15,9 @@ #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_SOLVER_H #include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" namespace clang { namespace dataflow { @@ -23,17 +25,58 @@ namespace dataflow { /// An interface for a SAT solver that can be used by dataflow analyses. class Solver { public: - enum class Result { - /// Indicates that there exists a satisfying assignment for a boolean + struct Result { + enum class Status { + /// Indicates that there exists a satisfying assignment for a boolean + /// formula. + Satisfiable, + + /// Indicates that there is no satisfying assignment for a boolean + /// formula. + Unsatisfiable, + + /// Indicates that the solver gave up trying to find a satisfying + /// assignment for a boolean formula. + TimedOut, + }; + + /// A boolean value is set to true or false in a truth assignment. + enum class Assignment : uint8_t { AssignedFalse = 0, AssignedTrue = 1 }; + + /// Constructs a result indicating that the queried boolean formula is + /// satisfiable. The result will hold a solution found by the solver. + static Result + Satisfiable(llvm::DenseMap<AtomicBoolValue *, Assignment> Solution) { + return Result(Status::Satisfiable, std::move(Solution)); + } + + /// Constructs a result indicating that the queried boolean formula is + /// unsatisfiable. + static Result Unsatisfiable() { return Result(Status::Unsatisfiable, {}); } + + /// Constructs a result indicating that satisfiability checking on the + /// queried boolean formula was not completed. + static Result TimedOut() { return Result(Status::TimedOut, {}); } + + /// Returns the status of satisfiability checking on the queried boolean /// formula. - Satisfiable, + Status getStatus() const { return SATCheckStatus; } - /// Indicates that there is no satisfying assignment for a boolean formula. - Unsatisfiable, + /// Returns a truth assignment to boolean values that satisfies the queried + /// boolean formula if available. Otherwise, an empty optional is returned. + llvm::Optional<llvm::DenseMap<AtomicBoolValue *, Assignment>> + getSolution() const { + return Solution; + } - /// Indicates that the solver gave up trying to find a satisfying assignment - /// for a boolean formula. - TimedOut, + private: + Result( + enum Status SATCheckStatus, + llvm::Optional<llvm::DenseMap<AtomicBoolValue *, Assignment>> Solution) + : SATCheckStatus(SATCheckStatus), Solution(std::move(Solution)) {} + + Status SATCheckStatus; + llvm::Optional<llvm::DenseMap<AtomicBoolValue *, Assignment>> Solution; }; virtual ~Solver() = default; @@ -44,9 +87,6 @@ public: /// Requirements: /// /// All elements in `Vals` must not be null. - /// - /// FIXME: Consider returning a model in case the conjunction of `Vals` is - /// satisfiable so that it can be used to generate warning messages. virtual Result solve(llvm::DenseSet<BoolValue *> Vals) = 0; }; diff --git a/clang/include/clang/Analysis/SelectorExtras.h b/clang/include/clang/Analysis/SelectorExtras.h index 278f20e87cc6..1e1daf5706bb 100644 --- a/clang/include/clang/Analysis/SelectorExtras.h +++ b/clang/include/clang/Analysis/SelectorExtras.h @@ -16,7 +16,7 @@ namespace clang { template <typename... IdentifierInfos> static inline Selector getKeywordSelector(ASTContext &Ctx, IdentifierInfos *... IIs) { - static_assert(sizeof...(IdentifierInfos), + static_assert(sizeof...(IdentifierInfos) > 0, "keyword selectors must have at least one argument"); SmallVector<IdentifierInfo *, 10> II({&Ctx.Idents.get(IIs)...}); diff --git a/clang/include/clang/Basic/AlignedAllocation.h b/clang/include/clang/Basic/AlignedAllocation.h index c1187b81420b..949e54c8c030 100644 --- a/clang/include/clang/Basic/AlignedAllocation.h +++ b/clang/include/clang/Basic/AlignedAllocation.h @@ -26,8 +26,8 @@ inline llvm::VersionTuple alignedAllocMinVersion(llvm::Triple::OSType OS) { default: break; case llvm::Triple::Darwin: - case llvm::Triple::MacOSX: // Earliest supporting version is 10.14. - return llvm::VersionTuple(10U, 14U); + case llvm::Triple::MacOSX: // Earliest supporting version is 10.13. + return llvm::VersionTuple(10U, 13U); case llvm::Triple::IOS: case llvm::Triple::TvOS: // Earliest supporting version is 11.0.0. return llvm::VersionTuple(11U); diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index cb47215f7e1d..78e0fce917a0 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -4036,3 +4036,14 @@ def NoRandomizeLayout : InheritableAttr { let LangOpts = [COnly]; } def : MutualExclusions<[RandomizeLayout, NoRandomizeLayout]>; + +def FunctionReturnThunks : InheritableAttr, + TargetSpecificAttr<TargetAnyX86> { + let Spellings = [GCC<"function_return">]; + let Args = [EnumArgument<"ThunkType", "Kind", + ["keep", "thunk-extern"], + ["Keep", "Extern"] + >]; + let Subjects = SubjectList<[Function]>; + let Documentation = [FunctionReturnThunksDocs]; +} diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 4e4d871a58a7..aff0dbbdd94d 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3088,8 +3088,8 @@ def FormatDocs : Documentation { let Content = [{ Clang supports the ``format`` attribute, which indicates that the function -accepts a ``printf`` or ``scanf``-like format string and corresponding -arguments or a ``va_list`` that contains these arguments. +accepts (among other possibilities) a ``printf`` or ``scanf``-like format string +and corresponding arguments or a ``va_list`` that contains these arguments. Please see `GCC documentation about format attribute <http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html>`_ to find details @@ -3143,6 +3143,27 @@ Clang implements two kinds of checks with this attribute. In this case Clang does not warn because the format string ``s`` and the corresponding arguments are annotated. If the arguments are incorrect, the caller of ``foo`` will receive a warning. + +As an extension to GCC's behavior, Clang accepts the ``format`` attribute on +non-variadic functions. Clang checks non-variadic format functions for the same +classes of issues that can be found on variadic functions, as controlled by the +same warning flags, except that the types of formatted arguments is forced by +the function signature. For example: + +.. code-block:: c + + __attribute__((__format__(__printf__, 1, 2))) + void fmt(const char *s, const char *a, int b); + + void bar(void) { + fmt("%s %i", "hello", 123); // OK + fmt("%i %g", "hello", 123); // warning: arguments don't match format + extern const char *fmt; + fmt(fmt, "hello", 123); // warning: format string is not a string literal + } + +Using the ``format`` attribute on a non-variadic function emits a GCC +compatibility diagnostic. }]; } @@ -6585,6 +6606,28 @@ evaluate to NULL. } return 0; } + }]; +} + +def FunctionReturnThunksDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The attribute ``function_return`` can replace return instructions with jumps to +target-specific symbols. This attribute supports 2 possible values, +corresponding to the values supported by the ``-mfunction-return=`` command +line flag: + +* ``__attribute__((function_return("keep")))`` to disable related transforms. + This is useful for undoing global setting from ``-mfunction-return=`` locally + for individual functions. +* ``__attribute__((function_return("thunk-extern")))`` to replace returns with + jumps, while NOT emitting the thunk. + +The values ``thunk`` and ``thunk-inline`` from GCC are not supported. + +The symbol used for ``thunk-extern`` is target specific: +* X86: ``__x86_return_thunk`` +As such, this function attribute is currently only supported on X86 targets. }]; } diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 3e5c376f9bc1..6bf35c340c2d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -825,6 +825,7 @@ BUILTIN(__rdtsc, "UOi", "") BUILTIN(__builtin_ia32_rdtscp, "UOiUi*", "") TARGET_BUILTIN(__builtin_ia32_rdpid, "Ui", "n", "rdpid") +TARGET_BUILTIN(__builtin_ia32_rdpru, "ULLii", "n", "rdpru") // PKU TARGET_BUILTIN(__builtin_ia32_rdpkru, "Ui", "n", "pku") diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 72b0e5d8eb41..b1d394edd04a 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -107,6 +107,7 @@ CODEGENOPT(CFProtectionReturn , 1, 0) ///< if -fcf-protection is CODEGENOPT(CFProtectionBranch , 1, 0) ///< if -fcf-protection is ///< set to full or branch. CODEGENOPT(IBTSeal, 1, 0) ///< set to optimize CFProtectionBranch. +CODEGENOPT(FunctionReturnThunks, 1, 0) ///< -mfunction-return={keep|thunk-extern} CODEGENOPT(XRayInstrumentFunctions , 1, 0) ///< Set when -fxray-instrument is ///< enabled. diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 23d76c308d84..cd204e5d7c15 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -389,6 +389,9 @@ public: /// On AArch64 this can only be "sp_el0". std::string StackProtectorGuardReg; + /// Specify a symbol to be the guard value. + std::string StackProtectorGuardSymbol; + /// Path to ignorelist file specifying which objects /// (files, functions) listed for instrumentation by sanitizer /// coverage pass should actually not be instrumented. diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 96219f83b0a5..68685baf7633 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -60,8 +60,6 @@ def err_drv_no_cuda_libdevice : Error< "cannot find libdevice for %0; provide path to different CUDA installation " "via '--cuda-path', or pass '-nocudalib' to build without linking with " "libdevice">; -def err_drv_no_rdc_new_driver : Error< - "Using '--offload-new-driver' requires '-fgpu-rdc'">; def err_drv_no_rocm_device_lib : Error< "cannot find ROCm device library%select{| for %1|for ABI version %1}0; provide its path via " diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 10da02ecbf7e..53e246a39ed8 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -31,6 +31,7 @@ def GNUAnonymousStruct : DiagGroup<"gnu-anonymous-struct">; def GNUAutoType : DiagGroup<"gnu-auto-type">; def ArrayBounds : DiagGroup<"array-bounds">; def ArrayBoundsPointerArithmetic : DiagGroup<"array-bounds-pointer-arithmetic">; +def ArrayParameter : DiagGroup<"array-parameter">; def AutoDisableVptrSanitizer : DiagGroup<"auto-disable-vptr-sanitizer">; def Availability : DiagGroup<"availability">; def Section : DiagGroup<"section">; @@ -187,6 +188,7 @@ def UnguardedAvailability : DiagGroup<"unguarded-availability", def : DiagGroup<"partial-availability", [UnguardedAvailability]>; def DeprecatedDynamicExceptionSpec : DiagGroup<"deprecated-dynamic-exception-spec">; +def DeprecatedBuiltins : DiagGroup<"deprecated-builtins">; def DeprecatedImplementations :DiagGroup<"deprecated-implementations">; def DeprecatedIncrementBool : DiagGroup<"deprecated-increment-bool">; def DeprecatedRegister : DiagGroup<"deprecated-register">; @@ -209,6 +211,7 @@ def Deprecated : DiagGroup<"deprecated", [DeprecatedAnonEnumEnumConversion, DeprecatedEnumCompareConditional, DeprecatedEnumEnumConversion, DeprecatedEnumFloatConversion, + DeprecatedBuiltins, DeprecatedIncrementBool, DeprecatedPragma, DeprecatedRegister, @@ -978,6 +981,7 @@ def Extra : DiagGroup<"extra", [ ]>; def Most : DiagGroup<"most", [ + ArrayParameter, BoolOperation, CharSubscript, Comment, diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index ac86076140c5..dd0909704492 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -113,6 +113,8 @@ def warn_four_char_character_literal : Warning< // Unicode and UCNs def err_invalid_utf8 : Error< "source file is not valid UTF-8">; +def warn_invalid_utf8_in_comment : Extension< + "invalid UTF-8 in comment">, InGroup<DiagGroup<"invalid-utf8">>; def err_character_not_allowed : Error< "unexpected character <U+%0>">; def err_character_not_allowed_identifier : Error< @@ -128,8 +130,15 @@ def warn_utf8_symbol_zero_width : Warning< "some environments">, InGroup<DiagGroup<"unicode-zero-width">>; def ext_delimited_escape_sequence : Extension< - "%select{delimited|named}0 escape sequences are a Clang extension">, + "%select{delimited|named}0 escape sequences are a " + "%select{Clang|C++2b}1 extension">, InGroup<DiagGroup<"delimited-escape-sequence-extension">>; + +def warn_cxx2b_delimited_escape_sequence : Warning< + "%select{delimited|named}0 escape sequences are " + "incompatible with C++ standards before C++2b">, + InGroup<CXXPre2bCompat>, DefaultIgnore; + def err_delimited_escape_empty : Error< "delimited escape sequence cannot be empty">; def err_delimited_escape_missing_brace: Error< diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index bc7aec3803e8..550029f58b54 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3115,8 +3115,6 @@ def note_ownership_returns_index_mismatch : Note< "declared with index %0 here">; def err_format_strftime_third_parameter : Error< "strftime format attribute requires 3rd parameter to be 0">; -def err_format_attribute_requires_variadic : Error< - "format attribute requires variadic function">; def err_format_attribute_not : Error<"format argument not a string type">; def err_format_attribute_result_not : Error<"function does not return %0">; def err_format_attribute_implicit_this_format_string : Error< @@ -3343,10 +3341,11 @@ def warn_assume_aligned_too_great "alignment assumed">, InGroup<DiagGroup<"builtin-assume-aligned-alignment">>; def warn_not_xl_compatible - : Warning<"requesting an alignment of 16 bytes or greater for struct" - " members is not binary compatible with IBM XL C/C++ for AIX" - " 16.1.0 and older">, + : Warning<"alignment of 16 bytes for a struct member is not binary " + "compatible with IBM XL C/C++ for AIX 16.1.0 or older">, InGroup<AIXCompat>; +def note_misaligned_member_used_here : Note< + "passing byval argument %0 with potentially incompatible alignment here">; def warn_redeclaration_without_attribute_prev_attribute_ignored : Warning< "%q0 redeclared without %1 attribute: previous %1 ignored">, InGroup<MicrosoftInconsistentDllImport>; @@ -4127,6 +4126,9 @@ def err_attribute_not_supported_on_arch def warn_gcc_ignores_type_attr : Warning< "GCC does not allow the %0 attribute to be written on a type">, InGroup<GccCompat>; +def warn_gcc_requires_variadic_function : Warning< + "GCC requires a function with the %0 attribute to be variadic">, + InGroup<GccCompat>; // Clang-Specific Attributes def warn_attribute_iboutlet : Warning< @@ -4829,8 +4831,12 @@ def warn_cxx14_compat_template_nontype_parm_auto_type : Warning< DefaultIgnore, InGroup<CXXPre17Compat>; def err_template_param_default_arg_redefinition : Error< "template parameter redefines default argument">; +def err_template_param_default_arg_inconsistent_redefinition : Error< + "template parameter default argument is inconsistent with previous definition">; def note_template_param_prev_default_arg : Note< "previous default template argument defined here">; +def note_template_param_prev_default_arg_in_other_module : Note< + "previous default template argument defined in module %0">; def err_template_param_default_arg_missing : Error< "template parameter missing a default argument">; def ext_template_parameter_default_in_function_template : ExtWarn< @@ -5557,6 +5563,9 @@ def warn_deprecated_def : Warning< def warn_unavailable_def : Warning< "implementing unavailable method">, InGroup<DeprecatedImplementations>, DefaultIgnore; +def warn_deprecated_builtin : Warning< + "builtin %0 is deprecated; use %1 instead">, + InGroup<DeprecatedBuiltins>; def err_unavailable : Error<"%0 is unavailable">; def err_property_method_unavailable : Error<"property access is using %0 method which is unavailable">; @@ -6605,13 +6614,16 @@ def warn_addition_in_bitshift : Warning< "'%1' will be evaluated first">, InGroup<ShiftOpParentheses>; def warn_self_assignment_builtin : Warning< - "explicitly assigning value of variable of type %0 to itself">, + "explicitly assigning value of variable of type %0 to itself%select{|; did " + "you mean to assign to member %2?}1">, InGroup<SelfAssignment>, DefaultIgnore; def warn_self_assignment_overloaded : Warning< - "explicitly assigning value of variable of type %0 to itself">, + "explicitly assigning value of variable of type %0 to itself%select{|; did " + "you mean to assign to member %2?}1">, InGroup<SelfAssignmentOverloaded>, DefaultIgnore; def warn_self_move : Warning< - "explicitly moving variable of type %0 to itself">, + "explicitly moving variable of type %0 to itself%select{|; did you mean to " + "move to member %2?}1">, InGroup<SelfMove>, DefaultIgnore; def err_builtin_move_forward_unsupported : Error< @@ -9395,6 +9407,12 @@ def warn_array_index_exceeds_max_addressable_bounds : Warning< def note_array_declared_here : Note< "array %0 declared here">; +def warn_inconsistent_array_form : Warning< + "argument %0 of type %1 with mismatched bound">, + InGroup<ArrayParameter>, DefaultIgnore; +def note_previous_declaration_as : Note< + "previously declared as %0 here">; + def warn_printf_insufficient_data_args : Warning< "more '%%' conversions than data arguments">, InGroup<FormatInsufficientArgs>; def warn_printf_data_arg_not_used : Warning< diff --git a/clang/include/clang/Basic/MakeSupport.h b/clang/include/clang/Basic/MakeSupport.h new file mode 100644 index 000000000000..c663014ba7bc --- /dev/null +++ b/clang/include/clang/Basic/MakeSupport.h @@ -0,0 +1,23 @@ +//===- MakeSupport.h - Make Utilities ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_MAKESUPPORT_H +#define LLVM_CLANG_BASIC_MAKESUPPORT_H + +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { + +/// Quote target names for inclusion in GNU Make dependency files. +/// Only the characters '$', '#', ' ', '\t' are quoted. +void quoteMakeTarget(StringRef Target, SmallVectorImpl<char> &Res); + +} // namespace clang + +#endif // LLVM_CLANG_BASIC_MAKESUPPORT_H diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 0ab3e9b67dfe..b4f3a69259fa 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -222,9 +222,7 @@ protected: mutable VersionTuple PlatformMinVersion; unsigned HasAlignMac68kSupport : 1; - unsigned RealTypeUsesObjCFPRetMask - : llvm::BitmaskEnumDetail::bitWidth( - (int)FloatModeKind::LLVM_BITMASK_LARGEST_ENUMERATOR); + unsigned RealTypeUsesObjCFPRetMask : llvm::BitWidth<FloatModeKind>; unsigned ComplexLongDoubleUsesFP2Ret : 1; unsigned HasBuiltinMSVaList : 1; @@ -893,7 +891,7 @@ public: /// Check whether the given real type should use the "fpret" flavor of /// Objective-C message passing on this target. bool useObjCFPRetForRealType(FloatModeKind T) const { - return RealTypeUsesObjCFPRetMask & llvm::BitmaskEnumDetail::Underlying(T); + return (int)((FloatModeKind)RealTypeUsesObjCFPRetMask & T); } /// Check whether _Complex long double should use the "fp2ret" flavor diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index b11b780ec1f7..d96020ee40d0 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -582,18 +582,8 @@ class IsFloat<string type> { } let HasUnMaskedOverloaded = false, - MaskedPolicy = NonePolicy, - ManualCodegen = [{ - IntrinsicTypes = {ResultType, Ops[1]->getType()}; - Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); - }], - MaskedManualCodegen= [{ - // Move mask to right before vl. - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); - IntrinsicTypes = {ResultType, Ops[3]->getType()}; - Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); - }] in { - class RVVVLEMaskBuiltin : RVVBuiltin<"m", "mPCUe", "c"> { + MaskedPolicy = NonePolicy in { + class RVVVLEMaskBuiltin : RVVOutBuiltin<"m", "mPCUe", "c"> { let Name = "vlm_v"; let IRName = "vlm"; let HasMasked = false; @@ -601,26 +591,15 @@ let HasUnMaskedOverloaded = false, } let HasUnMaskedOverloaded = false, - ManualCodegen = [{ - IntrinsicTypes = {ResultType, Ops[1]->getType()}; - Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); - Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); - }], - MaskedManualCodegen= [{ - // Move mask to right before vl. - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); - Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); - IntrinsicTypes = {ResultType, Ops[3]->getType()}; - Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); - }] in { + UnMaskedPolicy = HasPassthruOperand in { multiclass RVVVLEBuiltin<list<string> types> { let Name = NAME # "_v", IRName = "vle", MaskedIRName ="vle_mask" in { foreach type = types in { - def : RVVBuiltin<"v", "vPCe", type>; + def : RVVOutBuiltin<"v", "vPCe", type>; if !not(IsFloat<type>.val) then { - def : RVVBuiltin<"Uv", "UvPCUe", type>; + def : RVVOutBuiltin<"Uv", "UvPCUe", type>; } } } @@ -685,61 +664,39 @@ multiclass RVVVLSEBuiltin<list<string> types> { IRName = "vlse", MaskedIRName ="vlse_mask", HasUnMaskedOverloaded = false, - ManualCodegen = [{ - IntrinsicTypes = {ResultType, Ops[2]->getType()}; - Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); - Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); - }], - MaskedManualCodegen= [{ - // Move mask to right before vl. - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); - Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); - IntrinsicTypes = {ResultType, Ops[4]->getType()}; - Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); - }] in { + UnMaskedPolicy = HasPassthruOperand in { foreach type = types in { - def : RVVBuiltin<"v", "vPCet", type>; + def : RVVOutBuiltin<"v", "vPCet", type>; if !not(IsFloat<type>.val) then { - def : RVVBuiltin<"Uv", "UvPCUet", type>; + def : RVVOutBuiltin<"Uv", "UvPCUet", type>; } } } } multiclass RVVIndexedLoad<string op> { - let ManualCodegen = [{ - IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[2]->getType()}; - Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); - Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); - }], - MaskedManualCodegen = [{ - // Move mask to right before vl. - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); - Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); - IntrinsicTypes = {ResultType, Ops[2]->getType(), Ops[4]->getType()}; - Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); - }] in { - foreach type = TypeList in { - foreach eew_list = EEWList[0-2] in { - defvar eew = eew_list[0]; - defvar eew_type = eew_list[1]; - let Name = op # eew # "_v", IRName = op, MaskedIRName = op # "_mask" in { - def: RVVBuiltin<"v", "vPCe" # eew_type # "Uv", type>; - if !not(IsFloat<type>.val) then { - def: RVVBuiltin<"Uv", "UvPCUe" # eew_type # "Uv", type>; - } - } + let UnMaskedPolicy = HasPassthruOperand in { + foreach type = TypeList in { + foreach eew_list = EEWList[0-2] in { + defvar eew = eew_list[0]; + defvar eew_type = eew_list[1]; + let Name = op # eew # "_v", IRName = op, MaskedIRName = op # "_mask" in { + def: RVVOutOp1Builtin<"v", "vPCe" # eew_type # "Uv", type>; + if !not(IsFloat<type>.val) then { + def: RVVOutOp1Builtin<"Uv", "UvPCUe" # eew_type # "Uv", type>; + } } - defvar eew64 = "64"; - defvar eew64_type = "(Log2EEW:6)"; - let Name = op # eew64 # "_v", IRName = op, MaskedIRName = op # "_mask", - RequiredFeatures = ["RV64"] in { - def: RVVBuiltin<"v", "vPCe" # eew64_type # "Uv", type>; - if !not(IsFloat<type>.val) then { - def: RVVBuiltin<"Uv", "UvPCUe" # eew64_type # "Uv", type>; - } - } } + defvar eew64 = "64"; + defvar eew64_type = "(Log2EEW:6)"; + let Name = op # eew64 # "_v", IRName = op, MaskedIRName = op # "_mask", + RequiredFeatures = ["RV64"] in { + def: RVVOutOp1Builtin<"v", "vPCe" # eew64_type # "Uv", type>; + if !not(IsFloat<type>.val) then { + def: RVVOutOp1Builtin<"Uv", "UvPCUe" # eew64_type # "Uv", type>; + } + } + } } } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f4fe08aa1a5b..532d7780c529 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1998,6 +1998,13 @@ def fcf_protection : Flag<["-"], "fcf-protection">, Group<f_Group>, Flags<[CoreO HelpText<"Enable cf-protection in 'full' mode">; def mibt_seal : Flag<["-"], "mibt-seal">, Group<m_Group>, Flags<[CoreOption, CC1Option]>, HelpText<"Optimize fcf-protection=branch/full (requires LTO).">; +def mfunction_return_EQ : Joined<["-"], "mfunction-return=">, + Group<m_Group>, Flags<[CoreOption, CC1Option]>, + HelpText<"Replace returns with jumps to ``__x86_return_thunk`` (x86 only, error otherwise)">, + Values<"keep,thunk-extern">, + NormalizedValues<["Keep", "Extern"]>, + NormalizedValuesScope<"llvm::FunctionReturnThunksKind">, + MarshallingInfoEnum<CodeGenOpts<"FunctionReturnThunks">, "Keep">; defm xray_instrument : BoolFOption<"xray-instrument", LangOpts<"XRayInstrument">, DefaultFalse, @@ -3338,11 +3345,12 @@ def mhwmult_EQ : Joined<["-"], "mhwmult=">, Group<m_Group>; def mglobal_merge : Flag<["-"], "mglobal-merge">, Group<m_Group>, Flags<[CC1Option]>, HelpText<"Enable merging of globals">; def mhard_float : Flag<["-"], "mhard-float">, Group<m_Group>; -def miphoneos_version_min_EQ : Joined<["-"], "miphoneos-version-min=">, Group<m_Group>; def mios_version_min_EQ : Joined<["-"], "mios-version-min=">, - Alias<miphoneos_version_min_EQ>, HelpText<"Set iOS deployment target">; + Group<m_Group>, HelpText<"Set iOS deployment target">; +def : Joined<["-"], "miphoneos-version-min=">, + Group<m_Group>, Alias<mios_version_min_EQ>; def mios_simulator_version_min_EQ : Joined<["-"], "mios-simulator-version-min=">; -def miphonesimulator_version_min_EQ : Joined<["-"], "miphonesimulator-version-min=">, Alias<mios_simulator_version_min_EQ>; +def : Joined<["-"], "miphonesimulator-version-min=">, Alias<mios_simulator_version_min_EQ>; def mkernel : Flag<["-"], "mkernel">, Group<m_Group>; def mlinker_version_EQ : Joined<["-"], "mlinker-version=">, Flags<[NoXarchOption]>; @@ -3354,10 +3362,10 @@ def mmlir : Separate<["-"], "mmlir">, Flags<[CoreOption,FC1Option,FlangOption]>, def ffuchsia_api_level_EQ : Joined<["-"], "ffuchsia-api-level=">, Group<m_Group>, Flags<[CC1Option]>, HelpText<"Set Fuchsia API level">, MarshallingInfoInt<LangOpts<"FuchsiaAPILevel">>; -def mmacosx_version_min_EQ : Joined<["-"], "mmacosx-version-min=">, - Group<m_Group>, HelpText<"Set Mac OS X deployment target">; def mmacos_version_min_EQ : Joined<["-"], "mmacos-version-min=">, - Group<m_Group>, Alias<mmacosx_version_min_EQ>; + Group<m_Group>, HelpText<"Set macOS deployment target">; +def : Joined<["-"], "mmacosx-version-min=">, + Group<m_Group>, Alias<mmacos_version_min_EQ>; def mms_bitfields : Flag<["-"], "mms-bitfields">, Group<m_Group>, Flags<[CC1Option]>, HelpText<"Set the default structure layout to be compatible with the Microsoft compiler standard">, MarshallingInfoFlag<LangOpts<"MSBitfields">>; @@ -3771,6 +3779,9 @@ def mstack_protector_guard_EQ : Joined<["-"], "mstack-protector-guard=">, Group< def mstack_protector_guard_offset_EQ : Joined<["-"], "mstack-protector-guard-offset=">, Group<m_Group>, Flags<[CC1Option]>, HelpText<"Use the given offset for addressing the stack-protector guard">, MarshallingInfoInt<CodeGenOpts<"StackProtectorGuardOffset">, "INT_MAX", "int">; +def mstack_protector_guard_symbol_EQ : Joined<["-"], "mstack-protector-guard-symbol=">, Group<m_Group>, Flags<[CC1Option]>, + HelpText<"Use the given symbol for addressing the stack-protector guard">, + MarshallingInfoString<CodeGenOpts<"StackProtectorGuardSymbol">>; def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg=">, Group<m_Group>, Flags<[CC1Option]>, HelpText<"Use the given reg for addressing the stack-protector guard">, MarshallingInfoString<CodeGenOpts<"StackProtectorGuardReg">>; @@ -3939,7 +3950,7 @@ def module_file_info : Flag<["-"], "module-file-info">, Flags<[NoXarchOption,CC1 HelpText<"Provide information about a particular module file">; def mthumb : Flag<["-"], "mthumb">, Group<m_Group>; def mtune_EQ : Joined<["-"], "mtune=">, Group<m_Group>, - HelpText<"Only supported on X86 and RISC-V. Otherwise accepted for compatibility with GCC.">; + HelpText<"Only supported on X86, RISC-V and SystemZ. Otherwise accepted for compatibility with GCC.">; def multi__module : Flag<["-"], "multi_module">; def multiply__defined__unused : Separate<["-"], "multiply_defined_unused">; def multiply__defined : Separate<["-"], "multiply_defined">; @@ -4570,6 +4581,8 @@ def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>; def mno_ptwrite : Flag<["-"], "mno-ptwrite">, Group<m_x86_Features_Group>; def mrdpid : Flag<["-"], "mrdpid">, Group<m_x86_Features_Group>; def mno_rdpid : Flag<["-"], "mno-rdpid">, Group<m_x86_Features_Group>; +def mrdpru : Flag<["-"], "mrdpru">, Group<m_x86_Features_Group>; +def mno_rdpru : Flag<["-"], "mno-rdpru">, Group<m_x86_Features_Group>; def mrdrnd : Flag<["-"], "mrdrnd">, Group<m_x86_Features_Group>; def mno_rdrnd : Flag<["-"], "mno-rdrnd">, Group<m_x86_Features_Group>; def mrtm : Flag<["-"], "mrtm">, Group<m_x86_Features_Group>; diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h index ae829d741152..fe399850bd44 100644 --- a/clang/include/clang/Frontend/FrontendActions.h +++ b/clang/include/clang/Frontend/FrontendActions.h @@ -190,6 +190,10 @@ public: /// Dump information about the given module file, to be used for /// basic debugging and discovery. class DumpModuleInfoAction : public ASTFrontendAction { +public: + // Allow other tools (ex lldb) to direct output for their use. + llvm::raw_ostream *OutputStream = nullptr; + protected: std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override; diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index d130aba3ee3a..8fc24c731035 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -385,6 +385,7 @@ private: bool atTopLevel() { return S <= 0; } bool afterImportSeq() { return S == AfterImportSeq; } + bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; } private: State S; @@ -397,6 +398,67 @@ private: /// Our current position within a C++20 import-seq. ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq; + /// Track whether we are in a Global Module Fragment + class TrackGMF { + public: + enum GMFState : int { + GMFActive = 1, + MaybeGMF = 0, + BeforeGMFIntroducer = -1, + GMFAbsentOrEnded = -2, + }; + + TrackGMF(GMFState S) : S(S) {} + + /// Saw a semicolon. + void handleSemi() { + // If it is immediately after the first instance of the module keyword, + // then that introduces the GMF. + if (S == MaybeGMF) + S = GMFActive; + } + + /// Saw an 'export' identifier. + void handleExport() { + // The presence of an 'export' keyword always ends or excludes a GMF. + S = GMFAbsentOrEnded; + } + + /// Saw an 'import' identifier. + void handleImport(bool AfterTopLevelTokenSeq) { + // If we see this before any 'module' kw, then we have no GMF. + if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) + S = GMFAbsentOrEnded; + } + + /// Saw a 'module' identifier. + void handleModule(bool AfterTopLevelTokenSeq) { + // This was the first module identifier and not preceded by any token + // that would exclude a GMF. It could begin a GMF, but only if directly + // followed by a semicolon. + if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) + S = MaybeGMF; + else + S = GMFAbsentOrEnded; + } + + /// Saw any other token. + void handleMisc() { + // We saw something other than ; after the 'module' kw, so not a GMF. + if (S == MaybeGMF) + S = GMFAbsentOrEnded; + } + + bool inGMF() { return S == GMFActive; } + + private: + /// Track the transitions into and out of a Global Module Fragment, + /// if one is present. + GMFState S; + }; + + TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer; + /// Whether the module import expects an identifier next. Otherwise, /// it expects a '.' or ';'. bool ModuleImportExpectsIdentifier = false; @@ -2414,6 +2476,7 @@ private: None, ModuleBegin, ModuleImport, + HeaderUnitImport, SkippedModuleImport, Failure, } Kind; diff --git a/clang/include/clang/Sema/HLSLExternalSemaSource.h b/clang/include/clang/Sema/HLSLExternalSemaSource.h new file mode 100644 index 000000000000..439fc3d10f33 --- /dev/null +++ b/clang/include/clang/Sema/HLSLExternalSemaSource.h @@ -0,0 +1,41 @@ +//===--- HLSLExternalSemaSource.h - HLSL Sema Source ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the HLSLExternalSemaSource interface. +// +//===----------------------------------------------------------------------===// +#ifndef CLANG_SEMA_HLSLEXTERNALSEMASOURCE_H +#define CLANG_SEMA_HLSLEXTERNALSEMASOURCE_H + +#include "clang/Sema/ExternalSemaSource.h" + +namespace clang { +class NamespaceDecl; +class Sema; + +class HLSLExternalSemaSource : public ExternalSemaSource { + Sema *SemaPtr = nullptr; + NamespaceDecl *HLSLNamespace; + + void defineHLSLVectorAlias(); + +public: + ~HLSLExternalSemaSource() override; + + /// Initialize the semantic source with the Sema instance + /// being used to perform semantic analysis on the abstract syntax + /// tree. + void InitializeSema(Sema &S) override; + + /// Inform the semantic consumer that Sema is no longer available. + void ForgetSema() override { SemaPtr = nullptr; } +}; + +} // namespace clang + +#endif // CLANG_SEMA_HLSLEXTERNALSEMASOURCE_H diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ac241cf0515d..e51b9daef7d3 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2272,6 +2272,9 @@ private: bool isAcceptableSlow(const NamedDecl *D, AcceptableKind Kind); + // Determine whether the module M belongs to the current TU. + bool isModuleUnitOfCurrentTU(const Module *M) const; + public: /// Get the module unit whose scope we are currently within. Module *getCurrentModule() const { @@ -5167,6 +5170,11 @@ public: void DiagnoseSelfMove(const Expr *LHSExpr, const Expr *RHSExpr, SourceLocation OpLoc); + /// Returns a field in a CXXRecordDecl that has the same name as the decl \p + /// SelfAssigned when inside a CXXMethodDecl. + const FieldDecl * + getSelfAssignmentClassMemberCandidate(const ValueDecl *SelfAssigned); + /// Warn if we're implicitly casting from a _Nullable pointer type to a /// _Nonnull one. void diagnoseNullableToNonnullConversion(QualType DstType, QualType SrcType, @@ -12997,21 +13005,29 @@ public: SourceLocation getLocationOfStringLiteralByte(const StringLiteral *SL, unsigned ByteNo) const; -private: - void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, - const ArraySubscriptExpr *ASE=nullptr, - bool AllowOnePastEnd=true, bool IndexNegated=false); - void CheckArrayAccess(const Expr *E); + enum FormatArgumentPassingKind { + FAPK_Fixed, // values to format are fixed (no C-style variadic arguments) + FAPK_Variadic, // values to format are passed as variadic arguments + FAPK_VAList, // values to format are passed in a va_list + }; + // Used to grab the relevant information from a FormatAttr and a // FunctionDeclaration. struct FormatStringInfo { unsigned FormatIdx; unsigned FirstDataArg; - bool HasVAListArg; + FormatArgumentPassingKind ArgPassingKind; }; static bool getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember, - FormatStringInfo *FSI); + bool IsVariadic, FormatStringInfo *FSI); + +private: + void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, + const ArraySubscriptExpr *ASE = nullptr, + bool AllowOnePastEnd = true, bool IndexNegated = false); + void CheckArrayAccess(const Expr *E); + bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, const FunctionProtoType *Proto); bool CheckObjCMethodCall(ObjCMethodDecl *Method, SourceLocation loc, @@ -13023,6 +13039,8 @@ private: ArrayRef<const Expr *> Args, const FunctionProtoType *Proto, SourceLocation Loc); + void checkAIXMemberAlignment(SourceLocation Loc, const Expr *Arg); + void CheckArgAlignment(SourceLocation Loc, NamedDecl *FDecl, StringRef ParamName, QualType ArgTy, QualType ParamTy); @@ -13166,16 +13184,15 @@ public: private: bool CheckFormatArguments(const FormatAttr *Format, - ArrayRef<const Expr *> Args, - bool IsCXXMember, - VariadicCallType CallType, - SourceLocation Loc, SourceRange Range, + ArrayRef<const Expr *> Args, bool IsCXXMember, + VariadicCallType CallType, SourceLocation Loc, + SourceRange Range, llvm::SmallBitVector &CheckedVarArgs); bool CheckFormatArguments(ArrayRef<const Expr *> Args, - bool HasVAListArg, unsigned format_idx, + FormatArgumentPassingKind FAPK, unsigned format_idx, unsigned firstDataArg, FormatStringType Type, - VariadicCallType CallType, - SourceLocation Loc, SourceRange range, + VariadicCallType CallType, SourceLocation Loc, + SourceRange range, llvm::SmallBitVector &CheckedVarArgs); void CheckAbsoluteValueFunction(const CallExpr *Call, diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h index a01b32669ce3..50a27a211ef0 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h @@ -414,7 +414,8 @@ public: bool isArgumentConstructedDirectly(unsigned Index) const { // This assumes that the object was not yet removed from the state. return ExprEngine::getObjectUnderConstruction( - getState(), {getOriginExpr(), Index}, getLocationContext()).hasValue(); + getState(), {getOriginExpr(), Index}, getLocationContext()) + .has_value(); } /// Some calls have parameter numbering mismatched from argument numbering. @@ -1018,7 +1019,7 @@ public: SVal getObjectUnderConstruction() const { return ExprEngine::getObjectUnderConstruction(getState(), getOriginExpr(), getLocationContext()) - .getValue(); + .value(); } /// Number of non-placement arguments to the call. It is equal to 2 for diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h index 4b6cbd516628..22b405919bc1 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h @@ -53,21 +53,17 @@ public: } /// Return true if the constraint is perfectly constrained to 'true'. - bool isConstrainedTrue() const { return Val && Val.getValue(); } + bool isConstrainedTrue() const { return Val && Val.value(); } /// Return true if the constraint is perfectly constrained to 'false'. - bool isConstrainedFalse() const { return Val && !Val.getValue(); } + bool isConstrainedFalse() const { return Val && !Val.value(); } /// Return true if the constrained is perfectly constrained. - bool isConstrained() const { - return Val.hasValue(); - } + bool isConstrained() const { return Val.has_value(); } /// Return true if the constrained is underconstrained and we do not know /// if the constraint is true of value. - bool isUnderconstrained() const { - return !Val.hasValue(); - } + bool isUnderconstrained() const { return !Val.has_value(); } }; class ConstraintManager { diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h index 61cab28918db..cf515c5a809a 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h @@ -344,7 +344,7 @@ protected: if (!res) Cached[hash] = ConditionTruthVal(); else - Cached[hash] = ConditionTruthVal(res.getValue()); + Cached[hash] = ConditionTruthVal(res.value()); return Cached[hash]; } diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h index 3b6f205f9f22..a5e7e6d35cc8 100644 --- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h +++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h @@ -209,8 +209,8 @@ public: } bool isValid() const { return Valid; } - bool isScalar() const { return Scale && Scale.getValue() == 0; } - bool isVector() const { return Scale && Scale.getValue() != 0; } + bool isScalar() const { return Scale && Scale.value() == 0; } + bool isVector() const { return Scale && Scale.value() != 0; } bool isVector(unsigned Width) const { return isVector() && ElementBitwidth == Width; } @@ -225,6 +225,8 @@ public: return isFloat() && ElementBitwidth == Width; } + bool isPointer() const { return IsPointer; } + private: // Verify RVV vector type and set Valid. bool verifyType() const; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index e0d16df92e1a..b3882c227eaf 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -94,9 +94,9 @@ public: assert(!isDirectory() && "not a file"); assert(Contents && "contents not initialized"); if (auto *Directives = Contents->DepDirectives.load()) { - if (Directives->hasValue()) + if (Directives->has_value()) return ArrayRef<dependency_directives_scan::Directive>( - Directives->getValue()); + Directives->value()); } return None; } diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h index 3bb44e44187b..a85d333ba6b1 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h @@ -47,12 +47,12 @@ struct FullDependencies { /// Get the full command line. /// - /// \param LookupPCMPath This function is called to fill in "-fmodule-file=" - /// arguments and the "-o" argument. It needs to return - /// a path for where the PCM for the given module is to - /// be located. - std::vector<std::string> - getCommandLine(std::function<StringRef(ModuleID)> LookupPCMPath) const; + /// \param LookupModuleOutput This function is called to fill in + /// "-fmodule-file=", "-o" and other output + /// arguments for dependencies. + std::vector<std::string> getCommandLine( + llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)> + LookupOutput) const; /// Get the full command line, excluding -fmodule-file=" arguments. std::vector<std::string> getCommandLineWithoutModulePaths() const; diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h index e0a4d6a554eb..05c9f56b4cf6 100644 --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -65,6 +65,19 @@ struct ModuleIDHasher { } }; +/// An output from a module compilation, such as the path of the module file. +enum class ModuleOutputKind { + /// The module file (.pcm). Required. + ModuleFile, + /// The path of the dependency file (.d), if any. + DependencyFile, + /// The null-separated list of names to use as the targets in the dependency + /// file, if any. Defaults to the value of \c ModuleFile, as in the driver. + DependencyTargets, + /// The path of the serialized diagnostic file (.dia), if any. + DiagnosticSerializationFile, +}; + struct ModuleDeps { /// The identifier of the module. ModuleID ID; @@ -104,17 +117,25 @@ struct ModuleDeps { // the primary TU. bool ImportedByMainFile = false; + /// Whether the TU had a dependency file. The path in \c BuildInvocation is + /// cleared to avoid leaking the specific path from the TU into the module. + bool HadDependencyFile = false; + + /// Whether the TU had serialized diagnostics. The path in \c BuildInvocation + /// is cleared to avoid leaking the specific path from the TU into the module. + bool HadSerializedDiagnostics = false; + /// Compiler invocation that can be used to build this module (without paths). CompilerInvocation BuildInvocation; /// Gets the canonical command line suitable for passing to clang. /// - /// \param LookupPCMPath This function is called to fill in "-fmodule-file=" - /// arguments and the "-o" argument. It needs to return - /// a path for where the PCM for the given module is to - /// be located. + /// \param LookupModuleOutput This function is called to fill in + /// "-fmodule-file=", "-o" and other output + /// arguments. std::vector<std::string> getCanonicalCommandLine( - std::function<StringRef(ModuleID)> LookupPCMPath) const; + llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)> + LookupModuleOutput) const; /// Gets the canonical command line suitable for passing to clang, excluding /// "-fmodule-file=" and "-o" arguments. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 682b71a3d686..cfd7bf604542 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -3166,7 +3166,7 @@ void ASTContext::adjustDeducedFunctionResultType(FunctionDecl *FD, /// declaration of a function with an exception specification is permitted /// and preserved. Other type sugar (for instance, typedefs) is not. QualType ASTContext::getFunctionTypeWithExceptionSpec( - QualType Orig, const FunctionProtoType::ExceptionSpecInfo &ESI) { + QualType Orig, const FunctionProtoType::ExceptionSpecInfo &ESI) const { // Might have some parens. if (const auto *PT = dyn_cast<ParenType>(Orig)) return getParenType( @@ -3194,7 +3194,7 @@ QualType ASTContext::getFunctionTypeWithExceptionSpec( } bool ASTContext::hasSameFunctionTypeIgnoringExceptionSpec(QualType T, - QualType U) { + QualType U) const { return hasSameType(T, U) || (getLangOpts().CPlusPlus17 && hasSameType(getFunctionTypeWithExceptionSpec(T, EST_None), @@ -4703,7 +4703,7 @@ QualType ASTContext::getUnresolvedUsingType( QualType ASTContext::getAttributedType(attr::Kind attrKind, QualType modifiedType, - QualType equivalentType) { + QualType equivalentType) const { llvm::FoldingSetNodeID id; AttributedType::Profile(id, attrKind, modifiedType, equivalentType); @@ -5707,6 +5707,9 @@ QualType ASTContext::getAutoTypeInternal( !TypeConstraintConcept && !IsDependent) return getAutoDeductType(); + if (TypeConstraintConcept) + TypeConstraintConcept = TypeConstraintConcept->getCanonicalDecl(); + // Look in the folding set for an existing type. void *InsertPos = nullptr; llvm::FoldingSetNodeID ID; @@ -6215,8 +6218,59 @@ bool ASTContext::hasSameTemplateName(const TemplateName &X, getCanonicalTemplateName(Y).getAsVoidPointer(); } +bool ASTContext::isSameConstraintExpr(const Expr *XCE, const Expr *YCE) const { + if (!XCE != !YCE) + return false; + + if (!XCE) + return true; + + llvm::FoldingSetNodeID XCEID, YCEID; + XCE->Profile(XCEID, *this, /*Canonical=*/true); + YCE->Profile(YCEID, *this, /*Canonical=*/true); + return XCEID == YCEID; +} + +bool ASTContext::isSameTypeConstraint(const TypeConstraint *XTC, + const TypeConstraint *YTC) const { + if (!XTC != !YTC) + return false; + + if (!XTC) + return true; + + auto *NCX = XTC->getNamedConcept(); + auto *NCY = YTC->getNamedConcept(); + if (!NCX || !NCY || !isSameEntity(NCX, NCY)) + return false; + if (XTC->hasExplicitTemplateArgs() != YTC->hasExplicitTemplateArgs()) + return false; + if (XTC->hasExplicitTemplateArgs()) + if (XTC->getTemplateArgsAsWritten()->NumTemplateArgs != + YTC->getTemplateArgsAsWritten()->NumTemplateArgs) + return false; + + // Compare slowly by profiling. + // + // We couldn't compare the profiling result for the template + // args here. Consider the following example in different modules: + // + // template <__integer_like _Tp, C<_Tp> Sentinel> + // constexpr _Tp operator()(_Tp &&__t, Sentinel &&last) const { + // return __t; + // } + // + // When we compare the profiling result for `C<_Tp>` in different + // modules, it will compare the type of `_Tp` in different modules. + // However, the type of `_Tp` in different modules refer to different + // types here naturally. So we couldn't compare the profiling result + // for the template args directly. + return isSameConstraintExpr(XTC->getImmediatelyDeclaredConstraint(), + YTC->getImmediatelyDeclaredConstraint()); +} + bool ASTContext::isSameTemplateParameter(const NamedDecl *X, - const NamedDecl *Y) { + const NamedDecl *Y) const { if (X->getKind() != Y->getKind()) return false; @@ -6226,32 +6280,8 @@ bool ASTContext::isSameTemplateParameter(const NamedDecl *X, return false; if (TX->hasTypeConstraint() != TY->hasTypeConstraint()) return false; - const TypeConstraint *TXTC = TX->getTypeConstraint(); - const TypeConstraint *TYTC = TY->getTypeConstraint(); - if (!TXTC != !TYTC) - return false; - if (TXTC && TYTC) { - auto *NCX = TXTC->getNamedConcept(); - auto *NCY = TYTC->getNamedConcept(); - if (!NCX || !NCY || !isSameEntity(NCX, NCY)) - return false; - if (TXTC->hasExplicitTemplateArgs() != TYTC->hasExplicitTemplateArgs()) - return false; - if (TXTC->hasExplicitTemplateArgs()) { - auto *TXTCArgs = TXTC->getTemplateArgsAsWritten(); - auto *TYTCArgs = TYTC->getTemplateArgsAsWritten(); - if (TXTCArgs->NumTemplateArgs != TYTCArgs->NumTemplateArgs) - return false; - llvm::FoldingSetNodeID XID, YID; - for (auto &ArgLoc : TXTCArgs->arguments()) - ArgLoc.getArgument().Profile(XID, X->getASTContext()); - for (auto &ArgLoc : TYTCArgs->arguments()) - ArgLoc.getArgument().Profile(YID, Y->getASTContext()); - if (XID != YID) - return false; - } - } - return true; + return isSameTypeConstraint(TX->getTypeConstraint(), + TY->getTypeConstraint()); } if (auto *TX = dyn_cast<NonTypeTemplateParmDecl>(X)) { @@ -6267,8 +6297,8 @@ bool ASTContext::isSameTemplateParameter(const NamedDecl *X, TY->getTemplateParameters()); } -bool ASTContext::isSameTemplateParameterList(const TemplateParameterList *X, - const TemplateParameterList *Y) { +bool ASTContext::isSameTemplateParameterList( + const TemplateParameterList *X, const TemplateParameterList *Y) const { if (X->size() != Y->size()) return false; @@ -6276,19 +6306,46 @@ bool ASTContext::isSameTemplateParameterList(const TemplateParameterList *X, if (!isSameTemplateParameter(X->getParam(I), Y->getParam(I))) return false; - const Expr *XRC = X->getRequiresClause(); - const Expr *YRC = Y->getRequiresClause(); - if (!XRC != !YRC) + return isSameConstraintExpr(X->getRequiresClause(), Y->getRequiresClause()); +} + +bool ASTContext::isSameDefaultTemplateArgument(const NamedDecl *X, + const NamedDecl *Y) const { + // If the type parameter isn't the same already, we don't need to check the + // default argument further. + if (!isSameTemplateParameter(X, Y)) return false; - if (XRC) { - llvm::FoldingSetNodeID XRCID, YRCID; - XRC->Profile(XRCID, *this, /*Canonical=*/true); - YRC->Profile(YRCID, *this, /*Canonical=*/true); - if (XRCID != YRCID) + + if (auto *TTPX = dyn_cast<TemplateTypeParmDecl>(X)) { + auto *TTPY = cast<TemplateTypeParmDecl>(Y); + if (!TTPX->hasDefaultArgument() || !TTPY->hasDefaultArgument()) return false; + + return hasSameType(TTPX->getDefaultArgument(), TTPY->getDefaultArgument()); } - return true; + if (auto *NTTPX = dyn_cast<NonTypeTemplateParmDecl>(X)) { + auto *NTTPY = cast<NonTypeTemplateParmDecl>(Y); + if (!NTTPX->hasDefaultArgument() || !NTTPY->hasDefaultArgument()) + return false; + + Expr *DefaultArgumentX = NTTPX->getDefaultArgument()->IgnoreImpCasts(); + Expr *DefaultArgumentY = NTTPY->getDefaultArgument()->IgnoreImpCasts(); + llvm::FoldingSetNodeID XID, YID; + DefaultArgumentX->Profile(XID, *this, /*Canonical=*/true); + DefaultArgumentY->Profile(YID, *this, /*Canonical=*/true); + return XID == YID; + } + + auto *TTPX = cast<TemplateTemplateParmDecl>(X); + auto *TTPY = cast<TemplateTemplateParmDecl>(Y); + + if (!TTPX->hasDefaultArgument() || !TTPY->hasDefaultArgument()) + return false; + + const TemplateArgument &TAX = TTPX->getDefaultArgument().getArgument(); + const TemplateArgument &TAY = TTPY->getDefaultArgument().getArgument(); + return hasSameTemplateName(TAX.getAsTemplate(), TAY.getAsTemplate()); } static NamespaceDecl *getNamespace(const NestedNameSpecifier *X) { @@ -6371,7 +6428,7 @@ static bool hasSameOverloadableAttrs(const FunctionDecl *A, return true; } -bool ASTContext::isSameEntity(const NamedDecl *X, const NamedDecl *Y) { +bool ASTContext::isSameEntity(const NamedDecl *X, const NamedDecl *Y) const { if (X == Y) return true; @@ -6447,17 +6504,9 @@ bool ASTContext::isSameEntity(const NamedDecl *X, const NamedDecl *Y) { return false; } - const Expr *XRC = FuncX->getTrailingRequiresClause(); - const Expr *YRC = FuncY->getTrailingRequiresClause(); - if (!XRC != !YRC) + if (!isSameConstraintExpr(FuncX->getTrailingRequiresClause(), + FuncY->getTrailingRequiresClause())) return false; - if (XRC) { - llvm::FoldingSetNodeID XRCID, YRCID; - XRC->Profile(XRCID, *this, /*Canonical=*/true); - YRC->Profile(YRCID, *this, /*Canonical=*/true); - if (XRCID != YRCID) - return false; - } auto GetTypeAsWritten = [](const FunctionDecl *FD) { // Map to the first declaration that we've already merged into this one. @@ -6478,8 +6527,6 @@ bool ASTContext::isSameEntity(const NamedDecl *X, const NamedDecl *Y) { if (getLangOpts().CPlusPlus17 && XFPT && YFPT && (isUnresolvedExceptionSpec(XFPT->getExceptionSpecType()) || isUnresolvedExceptionSpec(YFPT->getExceptionSpecType())) && - // FIXME: We could make isSameEntity const after we make - // hasSameFunctionTypeIgnoringExceptionSpec const. hasSameFunctionTypeIgnoringExceptionSpec(XT, YT)) return true; return false; @@ -6521,6 +6568,20 @@ bool ASTContext::isSameEntity(const NamedDecl *X, const NamedDecl *Y) { // and patterns match. if (const auto *TemplateX = dyn_cast<TemplateDecl>(X)) { const auto *TemplateY = cast<TemplateDecl>(Y); + + // ConceptDecl wouldn't be the same if their constraint expression differs. + if (const auto *ConceptX = dyn_cast<ConceptDecl>(X)) { + const auto *ConceptY = cast<ConceptDecl>(Y); + const Expr *XCE = ConceptX->getConstraintExpr(); + const Expr *YCE = ConceptY->getConstraintExpr(); + assert(XCE && YCE && "ConceptDecl without constraint expression?"); + llvm::FoldingSetNodeID XID, YID; + XCE->Profile(XID, *this, /*Canonical=*/true); + YCE->Profile(YID, *this, /*Canonical=*/true); + if (XID != YID) + return false; + } + return isSameEntity(TemplateX->getTemplatedDecl(), TemplateY->getTemplatedDecl()) && isSameTemplateParameterList(TemplateX->getTemplateParameters(), diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index e9730112eaa3..73c3f02e67a8 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -5667,11 +5667,6 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateDecl(ClassTemplateDecl *D) { D2->setPreviousDecl(Recent); } - if (FromTemplated->isCompleteDefinition() && - !ToTemplated->isCompleteDefinition()) { - // FIXME: Import definition! - } - return D2; } @@ -5950,11 +5945,6 @@ ExpectedDecl ASTNodeImporter::VisitVarTemplateDecl(VarTemplateDecl *D) { ToVarTD->setPreviousDecl(Recent); } - if (DTemplated->isThisDeclarationADefinition() && - !ToTemplated->isThisDeclarationADefinition()) { - // FIXME: Import definition! - } - return ToVarTD; } diff --git a/clang/lib/AST/AttrImpl.cpp b/clang/lib/AST/AttrImpl.cpp index c1e7435b22da..deb28bee5ed8 100644 --- a/clang/lib/AST/AttrImpl.cpp +++ b/clang/lib/AST/AttrImpl.cpp @@ -169,7 +169,7 @@ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(const ValueDecl *VD) { llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr = getActiveAttr(VD); if (ActiveAttr) - return ActiveAttr.getValue()->getMapType(); + return ActiveAttr.value()->getMapType(); return llvm::None; } @@ -177,7 +177,7 @@ llvm::Optional<OMPDeclareTargetDeclAttr::DevTypeTy> OMPDeclareTargetDeclAttr::getDeviceType(const ValueDecl *VD) { llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr = getActiveAttr(VD); if (ActiveAttr) - return ActiveAttr.getValue()->getDevType(); + return ActiveAttr.value()->getDevType(); return llvm::None; } @@ -185,7 +185,7 @@ llvm::Optional<SourceLocation> OMPDeclareTargetDeclAttr::getLocation(const ValueDecl *VD) { llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr = getActiveAttr(VD); if (ActiveAttr) - return ActiveAttr.getValue()->getRange().getBegin(); + return ActiveAttr.value()->getRange().getBegin(); return llvm::None; } diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp index c6a392c9c01b..3f04d9b4073e 100644 --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -1007,6 +1007,12 @@ void DeclPrinter::VisitCXXRecordDecl(CXXRecordDecl *D) { } } + if (auto *Def = D->getDefinition()) { + if (D->hasAttr<FinalAttr>()) { + Out << " final"; + } + } + if (D->isCompleteDefinition()) { // Print the base classes if (D->getNumBases()) { diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index 4977aaa51319..c0879704de4d 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -321,6 +321,12 @@ bool clang::analyze_format_string::ParseUTF8InvalidSpecifier( clang::analyze_format_string::ArgType::MatchKind ArgType::matchesType(ASTContext &C, QualType argTy) const { + // When using the format attribute in C++, you can receive a function or an + // array that will necessarily decay to a pointer when passed to the final + // format consumer. Apply decay before type comparison. + if (argTy->canDecayToPointerType()) + argTy = C.getDecayedType(argTy); + if (Ptr) { // It has to be a pointer. const PointerType *PT = argTy->getAs<PointerType>(); diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index da538aa332ff..9b729e347a24 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -496,7 +496,7 @@ ByteCodeExprGen<Emitter>::getGlobalIdx(const VarDecl *VD) { template <class Emitter> const RecordType *ByteCodeExprGen<Emitter>::getRecordTy(QualType Ty) { - if (auto *PT = dyn_cast<PointerType>(Ty)) + if (const PointerType *PT = dyn_cast<PointerType>(Ty)) return PT->getPointeeType()->getAs<RecordType>(); else return Ty->getAs<RecordType>(); diff --git a/clang/lib/Analysis/BodyFarm.cpp b/clang/lib/Analysis/BodyFarm.cpp index 23d37b881069..38f100ae0a4f 100644 --- a/clang/lib/Analysis/BodyFarm.cpp +++ b/clang/lib/Analysis/BodyFarm.cpp @@ -699,7 +699,7 @@ static Stmt *create_OSAtomicCompareAndSwap(ASTContext &C, const FunctionDecl *D) Stmt *BodyFarm::getBody(const FunctionDecl *D) { Optional<Stmt *> &Val = Bodies[D]; if (Val) - return Val.getValue(); + return Val.value(); Val = nullptr; @@ -874,7 +874,7 @@ Stmt *BodyFarm::getBody(const ObjCMethodDecl *D) { Optional<Stmt *> &Val = Bodies[D]; if (Val) - return Val.getValue(); + return Val.value(); Val = nullptr; // For now, we only synthesize getters. diff --git a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp index fe9907a7c99b..58708b5b5efb 100644 --- a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -36,7 +36,7 @@ buildStmtToBasicBlockMap(const CFG &Cfg) { if (!Stmt) continue; - StmtToBlock[Stmt.getValue().getStmt()] = Block; + StmtToBlock[Stmt.value().getStmt()] = Block; } if (const Stmt *TerminatorStmt = Block->getTerminatorStmt()) StmtToBlock[TerminatorStmt] = Block; diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index e08fc71c51dc..cd87e87a6aca 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -24,8 +24,8 @@ namespace dataflow { StorageLocation & DataflowAnalysisContext::getStableStorageLocation(QualType Type) { - assert(!Type.isNull()); - if (Type->isStructureOrClassType() || Type->isUnionType()) { + if (!Type.isNull() && + (Type->isStructureOrClassType() || Type->isUnionType())) { // FIXME: Explore options to avoid eager initialization of fields as some of // them might not be needed for a particular analysis. llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; @@ -57,8 +57,8 @@ DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { PointerValue & DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { - assert(!PointeeType.isNull()); - auto CanonicalPointeeType = PointeeType.getCanonicalType(); + auto CanonicalPointeeType = + PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); if (Res.second) { auto &PointeeLoc = getStableStorageLocation(CanonicalPointeeType); diff --git a/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp b/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp new file mode 100644 index 000000000000..305d9d346089 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp @@ -0,0 +1,197 @@ +//===- DebugSupport.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines functions which generate more readable forms of data +// structures used in the dataflow analyses, for debugging purposes. +// +//===----------------------------------------------------------------------===// + +#include <utility> + +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Solver.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatCommon.h" +#include "llvm/Support/FormatVariadic.h" + +namespace clang { +namespace dataflow { + +using llvm::AlignStyle; +using llvm::fmt_pad; +using llvm::formatv; + +namespace { + +class DebugStringGenerator { +public: + explicit DebugStringGenerator( + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNamesArg) + : Counter(0), AtomNames(std::move(AtomNamesArg)) { +#ifndef NDEBUG + llvm::StringSet<> Names; + for (auto &N : AtomNames) { + assert(Names.insert(N.second).second && + "The same name must not assigned to different atoms"); + } +#endif + } + + /// Returns a string representation of a boolean value `B`. + std::string debugString(const BoolValue &B, size_t Depth = 0) { + std::string S; + switch (B.getKind()) { + case Value::Kind::AtomicBool: { + S = getAtomName(&cast<AtomicBoolValue>(B)); + break; + } + case Value::Kind::Conjunction: { + auto &C = cast<ConjunctionValue>(B); + auto L = debugString(C.getLeftSubValue(), Depth + 1); + auto R = debugString(C.getRightSubValue(), Depth + 1); + S = formatv("(and\n{0}\n{1})", L, R); + break; + } + case Value::Kind::Disjunction: { + auto &D = cast<DisjunctionValue>(B); + auto L = debugString(D.getLeftSubValue(), Depth + 1); + auto R = debugString(D.getRightSubValue(), Depth + 1); + S = formatv("(or\n{0}\n{1})", L, R); + break; + } + case Value::Kind::Negation: { + auto &N = cast<NegationValue>(B); + S = formatv("(not\n{0})", debugString(N.getSubVal(), Depth + 1)); + break; + } + default: + llvm_unreachable("Unhandled value kind"); + } + auto Indent = Depth * 4; + return formatv("{0}", fmt_pad(S, Indent, 0)); + } + + /// Returns a string representation of a set of boolean `Constraints` and the + /// `Result` of satisfiability checking on the `Constraints`. + std::string debugString(const std::vector<BoolValue *> &Constraints, + const Solver::Result &Result) { + auto Template = R"( +Constraints +------------ +{0:$[ + +]} +------------ +{1}. +{2} +)"; + + std::vector<std::string> ConstraintsStrings; + ConstraintsStrings.reserve(Constraints.size()); + for (auto &Constraint : Constraints) { + ConstraintsStrings.push_back(debugString(*Constraint)); + } + + auto StatusString = debugString(Result.getStatus()); + auto Solution = Result.getSolution(); + auto SolutionString = + Solution.hasValue() ? "\n" + debugString(Solution.value()) : ""; + + return formatv( + Template, + llvm::make_range(ConstraintsStrings.begin(), ConstraintsStrings.end()), + StatusString, SolutionString); + } + +private: + /// Returns a string representation of a truth assignment to atom booleans. + std::string debugString( + const llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment> + &AtomAssignments) { + size_t MaxNameLength = 0; + for (auto &AtomName : AtomNames) { + MaxNameLength = std::max(MaxNameLength, AtomName.second.size()); + } + + std::vector<std::string> Lines; + for (auto &AtomAssignment : AtomAssignments) { + auto Line = formatv("{0} = {1}", + fmt_align(getAtomName(AtomAssignment.first), + AlignStyle::Left, MaxNameLength), + debugString(AtomAssignment.second)); + Lines.push_back(Line); + } + llvm::sort(Lines.begin(), Lines.end()); + + return formatv("{0:$[\n]}", llvm::make_range(Lines.begin(), Lines.end())); + } + + /// Returns a string representation of a boolean assignment to true or false. + std::string debugString(Solver::Result::Assignment Assignment) { + switch (Assignment) { + case Solver::Result::Assignment::AssignedFalse: + return "False"; + case Solver::Result::Assignment::AssignedTrue: + return "True"; + } + llvm_unreachable("Booleans can only be assigned true/false"); + } + + /// Returns a string representation of the result status of a SAT check. + std::string debugString(Solver::Result::Status Status) { + switch (Status) { + case Solver::Result::Status::Satisfiable: + return "Satisfiable"; + case Solver::Result::Status::Unsatisfiable: + return "Unsatisfiable"; + case Solver::Result::Status::TimedOut: + return "TimedOut"; + } + llvm_unreachable("Unhandled SAT check result status"); + } + + /// Returns the name assigned to `Atom`, either user-specified or created by + /// default rules (B0, B1, ...). + std::string getAtomName(const AtomicBoolValue *Atom) { + auto Entry = AtomNames.try_emplace(Atom, formatv("B{0}", Counter)); + if (Entry.second) { + Counter++; + } + return Entry.first->second; + } + + // Keep track of number of atoms without a user-specified name, used to assign + // non-repeating default names to such atoms. + size_t Counter; + + // Keep track of names assigned to atoms. + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames; +}; + +} // namespace + +std::string +debugString(const BoolValue &B, + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) { + return DebugStringGenerator(std::move(AtomNames)).debugString(B); +} + +std::string +debugString(const std::vector<BoolValue *> &Constraints, + const Solver::Result &Result, + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) { + return DebugStringGenerator(std::move(AtomNames)) + .debugString(Constraints, Result); +} + +} // namespace dataflow +} // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 6443fc1b6422..6ce9dd55914d 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -51,7 +51,7 @@ public: assert(BlockIT != CFCtx.getStmtToBlock().end()); const auto &State = BlockToState[BlockIT->getSecond()->getBlockID()]; assert(State); - return &State.getValue().Env; + return &State.value().Env; } private: @@ -212,7 +212,7 @@ static TypeErasedDataflowAnalysisState computeBlockInputState( if (!MaybePredState) continue; - TypeErasedDataflowAnalysisState PredState = MaybePredState.getValue(); + TypeErasedDataflowAnalysisState PredState = MaybePredState.value(); if (ApplyBuiltinTransfer) { if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) { const StmtToEnvMapImpl StmtToEnv(CFCtx, BlockStates); @@ -370,7 +370,7 @@ runTypeErasedDataflowAnalysis( transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis); if (OldBlockState && - Analysis.isEqualTypeErased(OldBlockState.getValue().Lattice, + Analysis.isEqualTypeErased(OldBlockState.value().Lattice, NewBlockState.Lattice) && OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { // The state of `Block` didn't change after transfer so there's no need to diff --git a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp index 0e6e70d6d5d4..6a3948bd1fea 100644 --- a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp +++ b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp @@ -120,7 +120,13 @@ struct BooleanFormula { /// clauses in the formula start from the element at index 1. std::vector<ClauseID> NextWatched; - explicit BooleanFormula(Variable LargestVar) : LargestVar(LargestVar) { + /// Stores the variable identifier and value location for atomic booleans in + /// the formula. + llvm::DenseMap<Variable, AtomicBoolValue *> Atomics; + + explicit BooleanFormula(Variable LargestVar, + llvm::DenseMap<Variable, AtomicBoolValue *> Atomics) + : LargestVar(LargestVar), Atomics(std::move(Atomics)) { Clauses.push_back(0); ClauseStarts.push_back(0); NextWatched.push_back(0); @@ -180,28 +186,47 @@ BooleanFormula buildBooleanFormula(const llvm::DenseSet<BoolValue *> &Vals) { // Map each sub-value in `Vals` to a unique variable. llvm::DenseMap<BoolValue *, Variable> SubValsToVar; + // Store variable identifiers and value location of atomic booleans. + llvm::DenseMap<Variable, AtomicBoolValue *> Atomics; Variable NextVar = 1; { std::queue<BoolValue *> UnprocessedSubVals; for (BoolValue *Val : Vals) UnprocessedSubVals.push(Val); while (!UnprocessedSubVals.empty()) { + Variable Var = NextVar; BoolValue *Val = UnprocessedSubVals.front(); UnprocessedSubVals.pop(); - if (!SubValsToVar.try_emplace(Val, NextVar).second) + if (!SubValsToVar.try_emplace(Val, Var).second) continue; ++NextVar; // Visit the sub-values of `Val`. - if (auto *C = dyn_cast<ConjunctionValue>(Val)) { + switch (Val->getKind()) { + case Value::Kind::Conjunction: { + auto *C = cast<ConjunctionValue>(Val); UnprocessedSubVals.push(&C->getLeftSubValue()); UnprocessedSubVals.push(&C->getRightSubValue()); - } else if (auto *D = dyn_cast<DisjunctionValue>(Val)) { + break; + } + case Value::Kind::Disjunction: { + auto *D = cast<DisjunctionValue>(Val); UnprocessedSubVals.push(&D->getLeftSubValue()); UnprocessedSubVals.push(&D->getRightSubValue()); - } else if (auto *N = dyn_cast<NegationValue>(Val)) { + break; + } + case Value::Kind::Negation: { + auto *N = cast<NegationValue>(Val); UnprocessedSubVals.push(&N->getSubVal()); + break; + } + case Value::Kind::AtomicBool: { + Atomics[Var] = cast<AtomicBoolValue>(Val); + break; + } + default: + llvm_unreachable("buildBooleanFormula: unhandled value kind"); } } } @@ -212,7 +237,7 @@ BooleanFormula buildBooleanFormula(const llvm::DenseSet<BoolValue *> &Vals) { return ValIt->second; }; - BooleanFormula Formula(NextVar - 1); + BooleanFormula Formula(NextVar - 1, std::move(Atomics)); std::vector<bool> ProcessedSubVals(NextVar, false); // Add a conjunct for each variable that represents a top-level conjunction @@ -383,7 +408,7 @@ public: // If the root level is reached, then all possible assignments lead to // a conflict. if (Level == 0) - return WatchedLiteralsSolver::Result::Unsatisfiable; + return Solver::Result::Unsatisfiable(); // Otherwise, take the other branch at the most recent level where a // decision was made. @@ -440,12 +465,29 @@ public: ++I; } } - return WatchedLiteralsSolver::Result::Satisfiable; + return Solver::Result::Satisfiable(buildSolution()); } private: - // Reverses forced moves until the most recent level where a decision was made - // on the assignment of a variable. + /// Returns a satisfying truth assignment to the atomic values in the boolean + /// formula. + llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment> + buildSolution() { + llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment> Solution; + for (auto &Atomic : Formula.Atomics) { + // A variable may have a definite true/false assignment, or it may be + // unassigned indicating its truth value does not affect the result of + // the formula. Unassigned variables are assigned to true as a default. + Solution[Atomic.second] = + VarAssignments[Atomic.first] == Assignment::AssignedFalse + ? Solver::Result::Assignment::AssignedFalse + : Solver::Result::Assignment::AssignedTrue; + } + return Solution; + } + + /// Reverses forced moves until the most recent level where a decision was + /// made on the assignment of a variable. void reverseForcedMoves() { for (; LevelStates[Level] == State::Forced; --Level) { const Variable Var = LevelVars[Level]; @@ -459,7 +501,7 @@ private: } } - // Updates watched literals that are affected by a variable assignment. + /// Updates watched literals that are affected by a variable assignment. void updateWatchedLiterals() { const Variable Var = LevelVars[Level]; @@ -592,7 +634,7 @@ private: }; Solver::Result WatchedLiteralsSolver::solve(llvm::DenseSet<BoolValue *> Vals) { - return Vals.empty() ? WatchedLiteralsSolver::Result::Satisfiable + return Vals.empty() ? Solver::Result::Satisfiable({{}}) : WatchedLiteralsSolverImpl(Vals).solve(); } diff --git a/clang/lib/Analysis/PathDiagnostic.cpp b/clang/lib/Analysis/PathDiagnostic.cpp index 8a7305000746..bb5f116d6940 100644 --- a/clang/lib/Analysis/PathDiagnostic.cpp +++ b/clang/lib/Analysis/PathDiagnostic.cpp @@ -320,7 +320,7 @@ static Optional<bool> comparePath(const PathPieces &X, const PathPieces &Y) { for ( ; X_I != X_end && Y_I != Y_end; ++X_I, ++Y_I) { Optional<bool> b = comparePiece(**X_I, **Y_I); if (b) - return b.getValue(); + return b.value(); } return None; @@ -397,7 +397,7 @@ static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y) { } Optional<bool> b = comparePath(X.path, Y.path); assert(b); - return b.getValue(); + return b.value(); } void PathDiagnosticConsumer::FlushDiagnostics( diff --git a/clang/lib/Analysis/UninitializedValues.cpp b/clang/lib/Analysis/UninitializedValues.cpp index 800943a99d87..7f44685355e0 100644 --- a/clang/lib/Analysis/UninitializedValues.cpp +++ b/clang/lib/Analysis/UninitializedValues.cpp @@ -149,7 +149,7 @@ public: const VarDecl *vd) { const Optional<unsigned> &idx = declToIndex.getValueIndex(vd); assert(idx); - return getValueVector(block)[idx.getValue()]; + return getValueVector(block)[idx.value()]; } }; @@ -210,7 +210,7 @@ void CFGBlockValues::resetScratch() { ValueVector::reference CFGBlockValues::operator[](const VarDecl *vd) { const Optional<unsigned> &idx = declToIndex.getValueIndex(vd); assert(idx); - return scratch[idx.getValue()]; + return scratch[idx.value()]; } //------------------------------------------------------------------------====// diff --git a/clang/lib/Basic/LangStandards.cpp b/clang/lib/Basic/LangStandards.cpp index 5bacc3b16496..a21898dd3c62 100644 --- a/clang/lib/Basic/LangStandards.cpp +++ b/clang/lib/Basic/LangStandards.cpp @@ -61,8 +61,8 @@ LangStandard::Kind clang::getDefaultLanguageStandard(clang::Language Lang, if (CLANG_DEFAULT_STD_C != LangStandard::lang_unspecified) return CLANG_DEFAULT_STD_C; - // The PS4 and PS5 use C99 as the default C standard. - if (T.isPS()) + // The PS4 uses C99 as the default C standard. + if (T.isPS4()) return LangStandard::lang_gnu99; return LangStandard::lang_gnu17; case Language::ObjC: diff --git a/clang/lib/Basic/MakeSupport.cpp b/clang/lib/Basic/MakeSupport.cpp new file mode 100644 index 000000000000..37838f7bbc7b --- /dev/null +++ b/clang/lib/Basic/MakeSupport.cpp @@ -0,0 +1,35 @@ +//===-- MakeSuport.cpp --------------------------------------------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/MakeSupport.h" + +void clang::quoteMakeTarget(StringRef Target, SmallVectorImpl<char> &Res) { + for (unsigned i = 0, e = Target.size(); i != e; ++i) { + switch (Target[i]) { + case ' ': + case '\t': + // Escape the preceding backslashes + for (int j = i - 1; j >= 0 && Target[j] == '\\'; --j) + Res.push_back('\\'); + + // Escape the space/tab + Res.push_back('\\'); + break; + case '$': + Res.push_back('$'); + break; + case '#': + Res.push_back('\\'); + break; + default: + break; + } + + Res.push_back(Target[i]); + } +}
\ No newline at end of file diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index e22ed34e7da4..6685145ea6d2 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -503,7 +503,7 @@ bool TargetInfo::initFeatureMap( TargetInfo::CallingConvKind TargetInfo::getCallingConvKind(bool ClangABICompat4) const { if (getCXXABI() != TargetCXXABI::Microsoft && - (ClangABICompat4 || getTriple().getOS() == llvm::Triple::PS4)) + (ClangABICompat4 || getTriple().isPS4())) return CCK_ClangABI4OrPS4; return CCK_Default; } diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 32dd2bad2c5c..cb2cdb50e18e 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -252,7 +252,7 @@ bool RISCVTargetInfo::hasFeature(StringRef Feature) const { .Case("64bit", Is64Bit) .Default(None); if (Result) - return Result.getValue(); + return Result.value(); if (ISAInfo->isSupportedExtensionFeature(Feature)) return ISAInfo->hasExtension(Feature); diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index d12045c756c1..e4f242e624cb 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -123,6 +123,14 @@ public: void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override; + bool isValidTuneCPUName(StringRef Name) const override { + return isValidCPUName(Name); + } + + void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const override { + fillValidCPUList(Values); + } + bool setCPU(const std::string &Name) override { CPU = Name; ISARevision = getISARevision(CPU); diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 06988830eaed..69afdf8a3584 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -297,6 +297,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasCLDEMOTE = true; } else if (Feature == "+rdpid") { HasRDPID = true; + } else if (Feature == "+rdpru") { + HasRDPRU = true; } else if (Feature == "+kl") { HasKL = true; } else if (Feature == "+widekl") { @@ -743,6 +745,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__WIDEKL__"); if (HasRDPID) Builder.defineMacro("__RDPID__"); + if (HasRDPRU) + Builder.defineMacro("__RDPRU__"); if (HasCLDEMOTE) Builder.defineMacro("__CLDEMOTE__"); if (HasWAITPKG) @@ -926,6 +930,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("prfchw", true) .Case("ptwrite", true) .Case("rdpid", true) + .Case("rdpru", true) .Case("rdrnd", true) .Case("rdseed", true) .Case("rtm", true) @@ -1021,6 +1026,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("prfchw", HasPRFCHW) .Case("ptwrite", HasPTWRITE) .Case("rdpid", HasRDPID) + .Case("rdpru", HasRDPRU) .Case("rdrnd", HasRDRND) .Case("rdseed", HasRDSEED) .Case("retpoline-external-thunk", HasRetpolineExternalThunk) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 78e444f4e4eb..ea98dcf42de6 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -125,6 +125,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasMOVBE = false; bool HasPREFETCHWT1 = false; bool HasRDPID = false; + bool HasRDPRU = false; bool HasRetpolineExternalThunk = false; bool HasLAHFSAHF = false; bool HasWBNOINVD = false; @@ -420,8 +421,8 @@ public: // Use fpret for all types. RealTypeUsesObjCFPRetMask = - (int)(FloatModeKind::Float | FloatModeKind::Double | - FloatModeKind::LongDouble); + (unsigned)(FloatModeKind::Float | FloatModeKind::Double | + FloatModeKind::LongDouble); // x86-32 has atomics up to 8 bytes MaxAtomicPromoteWidth = 64; @@ -700,7 +701,7 @@ public: "64-i64:64-f80:128-n8:16:32:64-S128"); // Use fpret only for long double. - RealTypeUsesObjCFPRetMask = (int)FloatModeKind::LongDouble; + RealTypeUsesObjCFPRetMask = (unsigned)FloatModeKind::LongDouble; // Use fp2ret for _Complex long double. ComplexLongDoubleUsesFP2Ret = true; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index eb40e446057f..7c4e35634e5d 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -788,6 +788,18 @@ void EmitAssemblyHelper::RunOptimizationPipeline( SI.registerCallbacks(PIC, &FAM); PassBuilder PB(TM.get(), PTO, PGOOpt, &PIC); + // Enable verify-debuginfo-preserve-each for new PM. + DebugifyEachInstrumentation Debugify; + DebugInfoPerPass DebugInfoBeforePass; + if (CodeGenOpts.EnableDIPreservationVerify) { + Debugify.setDebugifyMode(DebugifyMode::OriginalDebugInfo); + Debugify.setDebugInfoBeforePass(DebugInfoBeforePass); + + if (!CodeGenOpts.DIBugsReportFilePath.empty()) + Debugify.setOrigDIVerifyBugsReportFilePath( + CodeGenOpts.DIBugsReportFilePath); + Debugify.registerCallbacks(PIC); + } // Attempt to load pass plugins and register their callbacks with PB. for (auto &PluginFN : CodeGenOpts.PassPlugins) { auto PassPlugin = PassPlugin::Load(PluginFN); diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 6f2679cb15e4..a8bb0dd65d1a 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -212,8 +212,7 @@ static std::unique_ptr<MangleContext> InitDeviceMC(CodeGenModule &CGM) { CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), TheModule(CGM.getModule()), - RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode || - CGM.getLangOpts().OffloadingNewDriver), + RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode), DeviceMC(InitDeviceMC(CGM)) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); @@ -1116,7 +1115,8 @@ void CGNVCUDARuntime::createOffloadingEntries() { llvm::OpenMPIRBuilder OMPBuilder(CGM.getModule()); OMPBuilder.initialize(); - StringRef Section = "cuda_offloading_entries"; + StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries" + : "cuda_offloading_entries"; for (KernelInfo &I : EmittedKernels) OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel], getDeviceSideName(cast<NamedDecl>(I.D)), 0, @@ -1171,10 +1171,11 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() { } return nullptr; } - if (!(CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode)) + if (CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode) + createOffloadingEntries(); + else return makeModuleCtorFunction(); - createOffloadingEntries(); return nullptr; } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 4e26c35c6342..104a30dd6b25 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1931,6 +1931,9 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, FuncAttrs.addAttribute(llvm::Attribute::Convergent); } + // TODO: NoUnwind attribute should be added for other GPU modes OpenCL, HIP, + // SYCL, OpenMP offload. AFAIK, none of them support exceptions in device + // code. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { // Exceptions aren't supported in CUDA device code. FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 153f299a1c4b..cde31711a7db 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -1783,14 +1783,14 @@ namespace { StartIndex = FieldIndex; } else if (StartIndex) { EHStack.pushCleanup<SanitizeDtorFieldRange>( - NormalAndEHCleanup, DD, StartIndex.getValue(), FieldIndex); + NormalAndEHCleanup, DD, StartIndex.value(), FieldIndex); StartIndex = None; } } void End() { if (StartIndex) EHStack.pushCleanup<SanitizeDtorFieldRange>(NormalAndEHCleanup, DD, - StartIndex.getValue(), -1); + StartIndex.value(), -1); } }; } // end anonymous namespace diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 305040b01c08..091eb9da5af4 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6717,11 +6717,9 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( default: break; } - } else if (DefaultNT == -1) { - return nullptr; } - return Bld.getInt32(DefaultNT); + return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT); } static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, @@ -10189,9 +10187,8 @@ llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( return UDMMap.lookup(D); } -void CGOpenMPRuntime::emitTargetNumIterationsCall( +llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *DeviceID, llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) { @@ -10201,20 +10198,12 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) TD = getNestedDistributeDirective(CGM.getContext(), D); if (!TD) - return; + return llvm::ConstantInt::get(CGF.Int64Ty, 0); + const auto *LD = cast<OMPLoopDirective>(TD); - auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, - PrePostActionTy &) { - if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { - llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), - Args); - } - }; - emitInlinedDirective(CGF, OMPD_unknown, CodeGen); + if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) + return NumIterations; + return llvm::ConstantInt::get(CGF.Int64Ty, 0); } void CGOpenMPRuntime::emitTargetCall( @@ -10308,26 +10297,34 @@ void CGOpenMPRuntime::emitTargetCall( // Source location for the ident struct llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - // Emit tripcount for the target loop-based directive. - emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); + // Get tripcount for the target loop-based directive. + llvm::Value *NumIterations = + emitTargetNumIterationsCall(CGF, D, SizeEmitter); + + // Arguments for the target kernel. + SmallVector<llvm::Value *> KernelArgs{ + CGF.Builder.getInt32(/* Version */ 1), + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray, + MapNamesArray, + InputInfo.MappersArray.getPointer(), + NumIterations}; + + // Arguments passed to the 'nowait' variant. + SmallVector<llvm::Value *> NoWaitKernelArgs{ + CGF.Builder.getInt32(0), + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + CGF.Builder.getInt32(0), + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + }; + + bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); - bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime - // via calls __tgt_target() or __tgt_target_teams(). - // - // __tgt_target() launches a target region with one team and one thread, - // executing a serial region. This master thread may in turn launch - // more threads within its team upon encountering a parallel region, - // however, no additional teams can be launched on the device. - // - // __tgt_target_teams() launches a target region with one or more teams, - // each with one or more threads. This call is required for target - // constructs such as: - // 'target teams' - // 'target' / 'teams' - // 'target teams distribute parallel for' - // 'target parallel' - // and so on. + // via calls to __tgt_target_kernel(). // // Note that on the host and CPU targets, the runtime implementation of // these calls simply call the outlined function without forking threads. @@ -10338,70 +10335,15 @@ void CGOpenMPRuntime::emitTargetCall( // In contrast, on the NVPTX target, the implementation of // __tgt_target_teams() launches a GPU kernel with the requested number // of teams and threads so no additional calls to the runtime are required. - if (NumTeams) { - // If we have NumTeams defined this means that we have an enclosed teams - // region. Therefore we also expect to have NumThreads defined. These two - // values should be defined in the presence of a teams directive, - // regardless of having any clauses associated. If the user is using teams - // but no clauses, these two values will be the default that should be - // passed to the runtime library - a 32-bit integer with the value zero. - assert(NumThreads && "Thread limit expression should be available along " - "with number of teams."); - SmallVector<llvm::Value *> OffloadingArgs = { - RTLoc, - DeviceID, - OutlinedFnID, - PointerNum, - InputInfo.BasePointersArray.getPointer(), - InputInfo.PointersArray.getPointer(), - InputInfo.SizesArray.getPointer(), - MapTypesArray, - MapNamesArray, - InputInfo.MappersArray.getPointer(), - NumTeams, - NumThreads}; - if (HasNowait) { - // Add int32_t depNum = 0, void *depList = nullptr, int32_t - // noAliasDepNum = 0, void *noAliasDepList = nullptr. - OffloadingArgs.push_back(CGF.Builder.getInt32(0)); - OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); - OffloadingArgs.push_back(CGF.Builder.getInt32(0)); - OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); - } - Return = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), HasNowait - ? OMPRTL___tgt_target_teams_nowait_mapper - : OMPRTL___tgt_target_teams_mapper), - OffloadingArgs); - } else { - SmallVector<llvm::Value *> OffloadingArgs = { - RTLoc, - DeviceID, - OutlinedFnID, - PointerNum, - InputInfo.BasePointersArray.getPointer(), - InputInfo.PointersArray.getPointer(), - InputInfo.SizesArray.getPointer(), - MapTypesArray, - MapNamesArray, - InputInfo.MappersArray.getPointer()}; - if (HasNowait) { - // Add int32_t depNum = 0, void *depList = nullptr, int32_t - // noAliasDepNum = 0, void *noAliasDepList = nullptr. - OffloadingArgs.push_back(CGF.Builder.getInt32(0)); - OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); - OffloadingArgs.push_back(CGF.Builder.getInt32(0)); - OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); - } - Return = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper - : OMPRTL___tgt_target_mapper), - OffloadingArgs); - } - // Check the error code and execute the host version if required. + CGF.Builder.restoreIP( + HasNoWait ? OMPBuilder.emitTargetKernel( + CGF.Builder, Return, RTLoc, DeviceID, NumTeams, + NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs) + : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc, + DeviceID, NumTeams, NumThreads, + OutlinedFnID, KernelArgs)); + llvm::BasicBlock *OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); llvm::BasicBlock *OffloadContBlock = diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 7fc6a7e278e5..b95aef68335e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -884,13 +884,11 @@ private: llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data); - /// Emit code that pushes the trip count of loops associated with constructs - /// 'target teams distribute' and 'teams distribute parallel for'. - /// \param SizeEmitter Emits the int64 value for the number of iterations of - /// the associated loop. - void emitTargetNumIterationsCall( + /// Return the trip count of loops associated with constructs / 'target teams + /// distribute' and 'teams distribute parallel for'. \param SizeEmitter Emits + /// the int64 value for the number of iterations of the associated loop. + llvm::Value *emitTargetNumIterationsCall( CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *DeviceID, llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 301f5278df69..db0b2ffd3a4f 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2591,11 +2591,12 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, } } -static bool isSupportedByOpenMPIRBuilder(const OMPExecutableDirective &S) { +static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { // Check for unsupported clauses - if (!S.clauses().empty()) { - // Currently no clause is supported - return false; + for (OMPClause *C : S.clauses()) { + // Currently only simdlen clause is supported + if (!isa<OMPSimdlenClause>(C)) + return false; } // Check if we have a statement with the ordered directive. @@ -2630,7 +2631,6 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { // Use the OpenMPIRBuilder if enabled. if (UseOMPIRBuilder) { // Emit the associated statement and get its loop representation. - llvm::DebugLoc DL = SourceLocToDebugLoc(S.getBeginLoc()); const Stmt *Inner = S.getRawStmt(); llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); @@ -2638,7 +2638,15 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); // Add SIMD specific metadata - OMPBuilder.applySimd(DL, CLI); + llvm::ConstantInt *Simdlen = nullptr; + if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) { + RValue Len = + this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), + /*ignoreResult=*/true); + auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); + Simdlen = Val; + } + OMPBuilder.applySimd(CLI, Simdlen); return; } }; @@ -5998,18 +6006,26 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RMWOp = llvm::AtomicRMWInst::Xor; break; case BO_LT: - RMWOp = X.getType()->hasSignedIntegerRepresentation() - ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min - : llvm::AtomicRMWInst::Max) - : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin - : llvm::AtomicRMWInst::UMax); + if (IsInteger) + RMWOp = X.getType()->hasSignedIntegerRepresentation() + ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min + : llvm::AtomicRMWInst::Max) + : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin + : llvm::AtomicRMWInst::UMax); + else + RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin + : llvm::AtomicRMWInst::FMax; break; case BO_GT: - RMWOp = X.getType()->hasSignedIntegerRepresentation() - ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max - : llvm::AtomicRMWInst::Min) - : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax - : llvm::AtomicRMWInst::UMin); + if (IsInteger) + RMWOp = X.getType()->hasSignedIntegerRepresentation() + ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max + : llvm::AtomicRMWInst::Min) + : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax + : llvm::AtomicRMWInst::UMin); + else + RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax + : llvm::AtomicRMWInst::FMin; break; case BO_Assign: RMWOp = llvm::AtomicRMWInst::Xchg; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 05942f462dd1..17c1c91c7e8f 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -898,6 +898,20 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (D && D->hasAttr<NoProfileFunctionAttr>()) Fn->addFnAttr(llvm::Attribute::NoProfile); + if (D) { + // Function attributes take precedence over command line flags. + if (auto *A = D->getAttr<FunctionReturnThunksAttr>()) { + switch (A->getThunkType()) { + case FunctionReturnThunksAttr::Kind::Keep: + break; + case FunctionReturnThunksAttr::Kind::Extern: + Fn->addFnAttr(llvm::Attribute::FnRetThunkExtern); + break; + } + } else if (CGM.getCodeGenOpts().FunctionReturnThunks) + Fn->addFnAttr(llvm::Attribute::FnRetThunkExtern); + } + if (FD && (getLangOpts().OpenCL || (getLangOpts().HIP && getLangOpts().CUDAIsDevice))) { // Add metadata for a kernel function. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 56ed59d1e3f1..c372bab1eccb 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -445,6 +445,7 @@ void CodeGenModule::checkAliases() { void CodeGenModule::clear() { DeferredDeclsToEmit.clear(); + EmittedDeferredDecls.clear(); if (OpenMPRuntime) OpenMPRuntime->clear(); } @@ -510,6 +511,9 @@ static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO, void CodeGenModule::Release() { EmitDeferred(); + DeferredDecls.insert(EmittedDeferredDecls.begin(), + EmittedDeferredDecls.end()); + EmittedDeferredDecls.clear(); EmitVTablesOpportunistically(); applyGlobalValReplacements(); applyReplacements(); @@ -900,6 +904,9 @@ void CodeGenModule::Release() { if (!getCodeGenOpts().StackProtectorGuardReg.empty()) getModule().setStackProtectorGuardReg( getCodeGenOpts().StackProtectorGuardReg); + if (!getCodeGenOpts().StackProtectorGuardSymbol.empty()) + getModule().setStackProtectorGuardSymbol( + getCodeGenOpts().StackProtectorGuardSymbol); if (getCodeGenOpts().StackProtectorGuardOffset != INT_MAX) getModule().setStackProtectorGuardOffset( getCodeGenOpts().StackProtectorGuardOffset); @@ -4286,6 +4293,9 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, getCUDARuntime().handleVarRegistration(D, *GV); } + if (D) + SanitizerMD->reportGlobal(GV, *D); + LangAS ExpectedAS = D ? D->getType().getAddressSpace() : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index da43b9616c88..10b49da27dab 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -344,6 +344,20 @@ private: std::vector<GlobalDecl> DeferredDeclsToEmit; void addDeferredDeclToEmit(GlobalDecl GD) { DeferredDeclsToEmit.emplace_back(GD); + addEmittedDeferredDecl(GD); + } + + /// Decls that were DeferredDecls and have now been emitted. + llvm::DenseMap<llvm::StringRef, GlobalDecl> EmittedDeferredDecls; + + void addEmittedDeferredDecl(GlobalDecl GD) { + if (!llvm::isa<FunctionDecl>(GD.getDecl())) + return; + llvm::GlobalVariable::LinkageTypes L = getFunctionLinkage(GD); + if (llvm::GlobalValue::isLinkOnceLinkage(L) || + llvm::GlobalValue::isWeakLinkage(L)) { + EmittedDeferredDecls[getMangledName(GD)] = GD; + } } /// List of alias we have emitted. Used to make sure that what they point to @@ -1516,6 +1530,11 @@ public: NewBuilder->WeakRefReferences = std::move(WeakRefReferences); NewBuilder->TBAA = std::move(TBAA); + + assert(NewBuilder->EmittedDeferredDecls.empty() && + "Still have (unmerged) EmittedDeferredDecls deferred decls"); + + NewBuilder->EmittedDeferredDecls = std::move(EmittedDeferredDecls); } private: diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index 95763d8e18b7..0cb63fbbe9e5 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -335,7 +335,42 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { if (auto *TTy = dyn_cast<RecordType>(Ty)) { const RecordDecl *RD = TTy->getDecl()->getDefinition(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); - SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields; + using TBAAStructField = llvm::MDBuilder::TBAAStructField; + SmallVector<TBAAStructField, 4> Fields; + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + // Handle C++ base classes. Non-virtual bases can treated a a kind of + // field. Virtual bases are more complex and omitted, but avoid an + // incomplete view for NewStructPathTBAA. + if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0) + return BaseTypeMetadataCache[Ty] = nullptr; + for (const CXXBaseSpecifier &B : CXXRD->bases()) { + if (B.isVirtual()) + continue; + QualType BaseQTy = B.getType(); + const CXXRecordDecl *BaseRD = BaseQTy->getAsCXXRecordDecl(); + if (BaseRD->isEmpty()) + continue; + llvm::MDNode *TypeNode = isValidBaseType(BaseQTy) + ? getBaseTypeInfo(BaseQTy) + : getTypeInfo(BaseQTy); + if (!TypeNode) + return BaseTypeMetadataCache[Ty] = nullptr; + uint64_t Offset = Layout.getBaseClassOffset(BaseRD).getQuantity(); + uint64_t Size = + Context.getASTRecordLayout(BaseRD).getDataSize().getQuantity(); + Fields.push_back( + llvm::MDBuilder::TBAAStructField(Offset, Size, TypeNode)); + } + // The order in which base class subobjects are allocated is unspecified, + // so may differ from declaration order. In particular, Itanium ABI will + // allocate a primary base first. + // Since we exclude empty subobjects, the objects are not overlapping and + // their offsets are unique. + llvm::sort(Fields, + [](const TBAAStructField &A, const TBAAStructField &B) { + return A.Offset < B.Offset; + }); + } for (FieldDecl *Field : RD->fields()) { if (Field->isZeroSize(Context) || Field->isUnnamedBitfield()) continue; diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp index 5f4eb9be981f..7848cf012633 100644 --- a/clang/lib/CodeGen/SanitizerMetadata.cpp +++ b/clang/lib/CodeGen/SanitizerMetadata.cpp @@ -60,17 +60,17 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV, Meta.NoHWAddress |= CGM.isInNoSanitizeList( FsanitizeArgument.Mask & SanitizerKind::HWAddress, GV, Loc, Ty); - Meta.NoMemtag |= NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag); - Meta.NoMemtag |= CGM.isInNoSanitizeList( + Meta.Memtag |= + static_cast<bool>(FsanitizeArgument.Mask & SanitizerKind::MemtagGlobals); + Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag); + Meta.Memtag &= !CGM.isInNoSanitizeList( FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty); - if (FsanitizeArgument.has(SanitizerKind::Address)) { - // TODO(hctim): Make this conditional when we migrate off llvm.asan.globals. - IsDynInit &= !CGM.isInNoSanitizeList(SanitizerKind::Address | - SanitizerKind::KernelAddress, - GV, Loc, Ty, "init"); - Meta.IsDynInit = IsDynInit; - } + Meta.IsDynInit = IsDynInit && !Meta.NoAddress && + FsanitizeArgument.has(SanitizerKind::Address) && + !CGM.isInNoSanitizeList(SanitizerKind::Address | + SanitizerKind::KernelAddress, + GV, Loc, Ty, "init"); GV->setSanitizerMetadata(Meta); } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 0da32dae2ef6..3a8400a55741 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2930,7 +2930,7 @@ class OffloadingActionBuilder final { return false; Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, - options::OPT_fno_gpu_rdc, /*Default=*/false); + options::OPT_fno_gpu_rdc, /*Default=*/false); const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); assert(HostTC && "No toolchain for host compilation."); @@ -3326,7 +3326,7 @@ class OffloadingActionBuilder final { AssociatedOffloadKind); if (CompileDeviceOnly && CurPhase == FinalPhase && BundleOutput && - BundleOutput.getValue()) { + BundleOutput.value()) { for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { OffloadAction::DeviceDependences DDep; DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], @@ -4355,7 +4355,17 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, return KnownArchs.lookup(TC); llvm::DenseSet<StringRef> Archs; - for (auto &Arg : Args) { + for (auto *Arg : Args) { + // Extract any '--[no-]offload-arch' arguments intended for this toolchain. + std::unique_ptr<llvm::opt::Arg> ExtractedArg = nullptr; + if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) && + ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) { + Arg->claim(); + unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); + ExtractedArg = getOpts().ParseOneArg(Args, Index); + Arg = ExtractedArg.get(); + } + if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { for (StringRef Arch : llvm::split(Arg->getValue(), ",")) Archs.insert(getCanonicalArchString(C, Args, Arch, TC->getTriple())); @@ -4425,8 +4435,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C, // Get the product of all bound architectures and toolchains. SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs; for (const ToolChain *TC : ToolChains) - for (StringRef Arch : getOffloadArchs( - C, C.getArgsForToolChain(TC, "generic", Kind), Kind, TC)) + for (StringRef Arch : getOffloadArchs(C, Args, Kind, TC)) TCAndArchs.push_back(std::make_pair(TC, Arch)); for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I) @@ -4477,11 +4486,23 @@ Action *Driver::BuildOffloadingActions(Compilation &C, if (offloadDeviceOnly()) return C.MakeAction<OffloadAction>(DDeps, types::TY_Nothing); - Action *OffloadPackager = - C.MakeAction<OffloadPackagerJobAction>(OffloadActions, types::TY_Image); OffloadAction::DeviceDependences DDep; - DDep.add(*OffloadPackager, *C.getSingleOffloadToolChain<Action::OFK_Host>(), - nullptr, Action::OFK_None); + if (C.isOffloadingHostKind(Action::OFK_Cuda) && + !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) { + // If we are not in RDC-mode we just emit the final CUDA fatbinary for each + // translation unit without requiring any linking. + Action *FatbinAction = + C.MakeAction<LinkJobAction>(OffloadActions, types::TY_CUDA_FATBIN); + DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_Cuda>(), + nullptr, Action::OFK_Cuda); + } else { + // Package all the offloading actions into a single output that can be + // embedded in the host and linked. + Action *PackagerAction = + C.MakeAction<OffloadPackagerJobAction>(OffloadActions, types::TY_Image); + DDep.add(*PackagerAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), + nullptr, Action::OFK_None); + } OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), /*BoundArch=*/nullptr, isa<CompileJobAction>(HostAction) ? DDep : DDeps); diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index 1e866553d826..89d408823270 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -437,7 +437,6 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, : getToolChain().GetProgramPath(getShortName()); ArgStringList CmdArgs; - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); @@ -476,8 +475,8 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, } if (SectionAddressData) { - std::string DataSectionArg = std::string("-Tdata=0x") + - llvm::utohexstr(SectionAddressData.getValue()); + std::string DataSectionArg = + std::string("-Tdata=0x") + llvm::utohexstr(SectionAddressData.value()); CmdArgs.push_back(Args.MakeArgString(DataSectionArg)); } else { // We do not have an entry for this CPU in the address mapping table yet. @@ -503,6 +502,7 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Add the link library specific to the MCU. CmdArgs.push_back(Args.MakeArgString(std::string("-l") + CPU)); + AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); CmdArgs.push_back("--end-group"); // Add user specified linker script. @@ -514,6 +514,8 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, // than the bare minimum supports. if (Linker.find("avr-ld") != std::string::npos) CmdArgs.push_back(Args.MakeArgString(std::string("-m") + *FamilyName)); + } else { + AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); } C.addCommand(std::make_unique<Command>( diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c9bbdb2ac72e..97435f1a73de 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -27,6 +27,7 @@ #include "clang/Basic/CharInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/LangOptions.h" +#include "clang/Basic/MakeSupport.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" @@ -51,6 +52,7 @@ #include "llvm/Support/Process.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/YAMLParser.h" +#include <cctype> using namespace clang::driver; using namespace clang::driver::tools; @@ -97,34 +99,6 @@ static void EscapeSpacesAndBackslashes(const char *Arg, } } -// Quote target names for inclusion in GNU Make dependency files. -// Only the characters '$', '#', ' ', '\t' are quoted. -static void QuoteTarget(StringRef Target, SmallVectorImpl<char> &Res) { - for (unsigned i = 0, e = Target.size(); i != e; ++i) { - switch (Target[i]) { - case ' ': - case '\t': - // Escape the preceding backslashes - for (int j = i - 1; j >= 0 && Target[j] == '\\'; --j) - Res.push_back('\\'); - - // Escape the space/tab - Res.push_back('\\'); - break; - case '$': - Res.push_back('$'); - break; - case '#': - Res.push_back('\\'); - break; - default: - break; - } - - Res.push_back(Target[i]); - } -} - /// Apply \a Work on the current tool chain \a RegularToolChain and any other /// offloading tool chain that is associated with the current action \a JA. static void @@ -567,7 +541,7 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args, break; } - if (Triple.isOSNetBSD()) { + if (Triple.isOSFuchsia() || Triple.isOSNetBSD()) { return !areOptimizationsEnabled(Args); } @@ -1144,7 +1118,7 @@ static void RenderDebugInfoCompressionArgs(const ArgList &Args, if (Value == "none") { CmdArgs.push_back("--compress-debug-sections=none"); } else if (Value == "zlib") { - if (llvm::zlib::isAvailable()) { + if (llvm::compression::zlib::isAvailable()) { CmdArgs.push_back( Args.MakeArgString("--compress-debug-sections=" + Twine(Value))); } else { @@ -1249,7 +1223,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, } else { CmdArgs.push_back("-MT"); SmallString<128> Quoted; - QuoteTarget(A->getValue(), Quoted); + quoteMakeTarget(A->getValue(), Quoted); CmdArgs.push_back(Args.MakeArgString(Quoted)); } } @@ -1274,7 +1248,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, CmdArgs.push_back("-MT"); SmallString<128> Quoted; - QuoteTarget(DepTarget, Quoted); + quoteMakeTarget(DepTarget, Quoted); CmdArgs.push_back(Args.MakeArgString(Quoted)); } @@ -2228,8 +2202,23 @@ void Clang::AddSparcTargetArgs(const ArgList &Args, void Clang::AddSystemZTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { - bool HasBackchain = Args.hasFlag(options::OPT_mbackchain, - options::OPT_mno_backchain, false); + if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { + StringRef Name = A->getValue(); + + std::string TuneCPU; + if (Name == "native") + TuneCPU = std::string(llvm::sys::getHostCPUName()); + else + TuneCPU = std::string(Name); + + if (!TuneCPU.empty()) { + CmdArgs.push_back("-tune-cpu"); + CmdArgs.push_back(Args.MakeArgString(TuneCPU)); + } + } + + bool HasBackchain = + Args.hasFlag(options::OPT_mbackchain, options::OPT_mno_backchain, false); bool HasPackedStack = Args.hasFlag(options::OPT_mpacked_stack, options::OPT_mno_packed_stack, false); systemz::FloatABI FloatABI = @@ -2341,7 +2330,7 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args, if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-hexagon-small-data-threshold=" + - Twine(G.getValue()))); + Twine(G.value()))); } if (!Args.hasArg(options::OPT_fno_short_enums)) @@ -3231,6 +3220,16 @@ static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs, Args.AddAllArgValues(CmdArgs, options::OPT_Xanalyzer); } +static bool isValidSymbolName(StringRef S) { + if (S.empty()) + return false; + + if (std::isdigit(S[0])) + return false; + + return llvm::all_of(S, [](char C) { return std::isalnum(C) || C == '_'; }); +} + static void RenderSSPOptions(const Driver &D, const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs, bool KernelOrKext) { @@ -3362,6 +3361,16 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC, } A->render(Args, CmdArgs); } + + if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_symbol_EQ)) { + StringRef Value = A->getValue(); + if (!isValidSymbolName(Value)) { + D.Diag(diag::err_drv_argument_only_allowed_with) + << A->getOption().getName() << "legal symbol name"; + return; + } + A->render(Args, CmdArgs); + } } static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args, @@ -3750,38 +3759,49 @@ static void RenderModulesOptions(Compilation &C, const Driver &D, Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_interval); Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_after); - Args.AddLastArg(CmdArgs, options::OPT_fbuild_session_timestamp); + if (HaveClangModules) { + Args.AddLastArg(CmdArgs, options::OPT_fbuild_session_timestamp); - if (Arg *A = Args.getLastArg(options::OPT_fbuild_session_file)) { - if (Args.hasArg(options::OPT_fbuild_session_timestamp)) - D.Diag(diag::err_drv_argument_not_allowed_with) - << A->getAsString(Args) << "-fbuild-session-timestamp"; + if (Arg *A = Args.getLastArg(options::OPT_fbuild_session_file)) { + if (Args.hasArg(options::OPT_fbuild_session_timestamp)) + D.Diag(diag::err_drv_argument_not_allowed_with) + << A->getAsString(Args) << "-fbuild-session-timestamp"; - llvm::sys::fs::file_status Status; - if (llvm::sys::fs::status(A->getValue(), Status)) - D.Diag(diag::err_drv_no_such_file) << A->getValue(); - CmdArgs.push_back(Args.MakeArgString( - "-fbuild-session-timestamp=" + - Twine((uint64_t)std::chrono::duration_cast<std::chrono::seconds>( - Status.getLastModificationTime().time_since_epoch()) - .count()))); - } + llvm::sys::fs::file_status Status; + if (llvm::sys::fs::status(A->getValue(), Status)) + D.Diag(diag::err_drv_no_such_file) << A->getValue(); + CmdArgs.push_back(Args.MakeArgString( + "-fbuild-session-timestamp=" + + Twine((uint64_t)std::chrono::duration_cast<std::chrono::seconds>( + Status.getLastModificationTime().time_since_epoch()) + .count()))); + } - if (Args.getLastArg(options::OPT_fmodules_validate_once_per_build_session)) { - if (!Args.getLastArg(options::OPT_fbuild_session_timestamp, - options::OPT_fbuild_session_file)) - D.Diag(diag::err_drv_modules_validate_once_requires_timestamp); + if (Args.getLastArg( + options::OPT_fmodules_validate_once_per_build_session)) { + if (!Args.getLastArg(options::OPT_fbuild_session_timestamp, + options::OPT_fbuild_session_file)) + D.Diag(diag::err_drv_modules_validate_once_requires_timestamp); - Args.AddLastArg(CmdArgs, - options::OPT_fmodules_validate_once_per_build_session); - } + Args.AddLastArg(CmdArgs, + options::OPT_fmodules_validate_once_per_build_session); + } - if (Args.hasFlag(options::OPT_fmodules_validate_system_headers, - options::OPT_fno_modules_validate_system_headers, - ImplicitModules)) - CmdArgs.push_back("-fmodules-validate-system-headers"); + if (Args.hasFlag(options::OPT_fmodules_validate_system_headers, + options::OPT_fno_modules_validate_system_headers, + ImplicitModules)) + CmdArgs.push_back("-fmodules-validate-system-headers"); - Args.AddLastArg(CmdArgs, options::OPT_fmodules_disable_diagnostic_validation); + Args.AddLastArg(CmdArgs, + options::OPT_fmodules_disable_diagnostic_validation); + } else { + Args.ClaimAllArgs(options::OPT_fbuild_session_timestamp); + Args.ClaimAllArgs(options::OPT_fbuild_session_file); + Args.ClaimAllArgs(options::OPT_fmodules_validate_once_per_build_session); + Args.ClaimAllArgs(options::OPT_fmodules_validate_system_headers); + Args.ClaimAllArgs(options::OPT_fno_modules_validate_system_headers); + Args.ClaimAllArgs(options::OPT_fmodules_disable_diagnostic_validation); + } } static void RenderCharacterOptions(const ArgList &Args, const llvm::Triple &T, @@ -4422,12 +4442,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.hasFlag(options::OPT_offload_new_driver, options::OPT_no_offload_new_driver, false)); + bool IsRDCMode = + Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false); bool IsUsingLTO = D.isUsingLTO(IsDeviceOffloadAction); auto LTOMode = D.getLTOMode(IsDeviceOffloadAction); // A header module compilation doesn't have a main input file, so invent a // fake one as a placeholder. - const char *ModuleName = [&]{ + const char *ModuleName = [&] { auto *ModuleNameArg = Args.getLastArg(options::OPT_fmodule_name_EQ); return ModuleNameArg ? ModuleNameArg->getValue() : ""; }(); @@ -6285,10 +6307,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } if (IsCuda || IsHIP) { - if (!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) && - Args.hasArg(options::OPT_offload_new_driver)) - D.Diag(diag::err_drv_no_rdc_new_driver); - if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) + if (IsRDCMode) CmdArgs.push_back("-fgpu-rdc"); if (Args.hasFlag(options::OPT_fgpu_defer_diag, options::OPT_fno_gpu_defer_diag, false)) @@ -6313,6 +6332,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (IsUsingLTO) Args.AddLastArg(CmdArgs, options::OPT_mibt_seal); + if (Arg *A = Args.getLastArg(options::OPT_mfunction_return_EQ)) + CmdArgs.push_back( + Args.MakeArgString(Twine("-mfunction-return=") + A->getValue())); + // Forward -f options with positive and negative forms; we translate these by // hand. Do not propagate PGO options to the GPU-side compilations as the // profile info is for the host-side compilation only. @@ -6956,13 +6979,22 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } } - // Host-side cuda compilation receives all device-side outputs in a single - // fatbin as Inputs[1]. Include the binary with -fcuda-include-gpubinary. + // Host-side offloading compilation receives all device-side outputs. Include + // them in the host compilation depending on the target. If the host inputs + // are not empty we use the new-driver scheme, otherwise use the old scheme. if ((IsCuda || IsHIP) && CudaDeviceInput) { + CmdArgs.push_back("-fcuda-include-gpubinary"); + CmdArgs.push_back(CudaDeviceInput->getFilename()); + } else if (!HostOffloadingInputs.empty()) { + if (IsCuda && !IsRDCMode) { + assert(HostOffloadingInputs.size() == 1 && "Only one input expected"); CmdArgs.push_back("-fcuda-include-gpubinary"); - CmdArgs.push_back(CudaDeviceInput->getFilename()); - if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) - CmdArgs.push_back("-fgpu-rdc"); + CmdArgs.push_back(HostOffloadingInputs.front().getFilename()); + } else { + for (const InputInfo Input : HostOffloadingInputs) + CmdArgs.push_back(Args.MakeArgString("-fembed-offload-object=" + + TC.getInputFilename(Input))); + } } if (IsCuda) { @@ -7011,12 +7043,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } } - // Host-side offloading recieves the device object files and embeds it in a - // named section including the associated target triple and architecture. - for (const InputInfo Input : HostOffloadingInputs) - CmdArgs.push_back(Args.MakeArgString("-fembed-offload-object=" + - TC.getInputFilename(Input))); - if (Triple.isAMDGPU()) { handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs); @@ -8314,7 +8340,8 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, ArgStringList Features; SmallVector<StringRef> FeatureArgs; - getTargetFeatures(TC->getDriver(), TC->getTriple(), Args, Features, false); + getTargetFeatures(TC->getDriver(), TC->getTriple(), TCArgs, Features, + false); llvm::copy_if(Features, std::back_inserter(FeatureArgs), [](StringRef Arg) { return !Arg.startswith("-target"); }); @@ -8382,7 +8409,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, for (StringRef LibName : BCLibs) CmdArgs.push_back(Args.MakeArgString( - "-target-library=" + Action::GetOffloadKindName(Action::OFK_OpenMP) + + "--bitcode-library=" + Action::GetOffloadKindName(Action::OFK_OpenMP) + "-" + TC->getTripleString() + "-" + Arch + "=" + LibName)); } @@ -8402,63 +8429,64 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, } else if (A->getOption().matches(options::OPT_O0)) OOpt = "0"; if (!OOpt.empty()) - CmdArgs.push_back(Args.MakeArgString(Twine("-opt-level=O") + OOpt)); + CmdArgs.push_back(Args.MakeArgString(Twine("--opt-level=O") + OOpt)); } } - CmdArgs.push_back("-host-triple"); - CmdArgs.push_back(Args.MakeArgString(TheTriple.getTriple())); + CmdArgs.push_back( + Args.MakeArgString("--host-triple=" + TheTriple.getTriple())); if (Args.hasArg(options::OPT_v)) - CmdArgs.push_back("-v"); + CmdArgs.push_back("--verbose"); - // Add debug information if present. if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) { - const Option &Opt = A->getOption(); - if (Opt.matches(options::OPT_gN_Group)) { - if (Opt.matches(options::OPT_gline_directives_only) || - Opt.matches(options::OPT_gline_tables_only)) - CmdArgs.push_back("-gline-directives-only"); - } else - CmdArgs.push_back("-g"); + if (!A->getOption().matches(options::OPT_g0)) + CmdArgs.push_back("--device-debug"); } for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas)) - CmdArgs.push_back(Args.MakeArgString("-ptxas-args=" + A)); + CmdArgs.push_back(Args.MakeArgString("--ptxas-args=" + A)); // Forward remarks passes to the LLVM backend in the wrapper. if (const Arg *A = Args.getLastArg(options::OPT_Rpass_EQ)) CmdArgs.push_back( - Args.MakeArgString(Twine("-pass-remarks=") + A->getValue())); + Args.MakeArgString(Twine("--pass-remarks=") + A->getValue())); if (const Arg *A = Args.getLastArg(options::OPT_Rpass_missed_EQ)) CmdArgs.push_back( - Args.MakeArgString(Twine("-pass-remarks-missed=") + A->getValue())); + Args.MakeArgString(Twine("--pass-remarks-missed=") + A->getValue())); if (const Arg *A = Args.getLastArg(options::OPT_Rpass_analysis_EQ)) CmdArgs.push_back( - Args.MakeArgString(Twine("-pass-remarks-analysis=") + A->getValue())); + Args.MakeArgString(Twine("--pass-remarks-analysis=") + A->getValue())); if (Args.getLastArg(options::OPT_save_temps_EQ)) - CmdArgs.push_back("-save-temps"); + CmdArgs.push_back("--save-temps"); // Construct the link job so we can wrap around it. Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput); const auto &LinkCommand = C.getJobs().getJobs().back(); // Forward -Xoffload-linker<-triple> arguments to the device link job. - for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker)) { - StringRef Val = Arg->getValue(0); + for (Arg *A : Args.filtered(options::OPT_Xoffload_linker)) { + StringRef Val = A->getValue(0); if (Val.empty()) CmdArgs.push_back( - Args.MakeArgString(Twine("-device-linker=") + Arg->getValue(1))); + Args.MakeArgString(Twine("--device-linker=") + A->getValue(1))); else CmdArgs.push_back(Args.MakeArgString( - "-device-linker=" + + "--device-linker=" + ToolChain::getOpenMPTriple(Val.drop_front()).getTriple() + "=" + - Arg->getValue(1))); + A->getValue(1))); } Args.ClaimAllArgs(options::OPT_Xoffload_linker); + // Forward `-mllvm` arguments to the LLVM invocations if present. + for (Arg *A : Args.filtered(options::OPT_mllvm)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(A->getValue()); + A->claim(); + } + // Add the linker arguments to be forwarded by the wrapper. - CmdArgs.push_back("-linker-path"); - CmdArgs.push_back(LinkCommand->getExecutable()); + CmdArgs.push_back(Args.MakeArgString(Twine("--linker-path=") + + LinkCommand->getExecutable())); CmdArgs.push_back("--"); for (const char *LinkArg : LinkCommand->getArguments()) CmdArgs.push_back(LinkArg); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2d53b829b01c..1d2c085d683e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -727,7 +727,8 @@ bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, if (IsOffloadingHost) CmdArgs.push_back("-lomptarget"); - if (IsOffloadingHost && TC.getDriver().isUsingLTO(/* IsOffload */ true)) + if (IsOffloadingHost && TC.getDriver().isUsingLTO(/* IsOffload */ true) && + !Args.hasArg(options::OPT_nogpulib)) CmdArgs.push_back("-lomptarget.devicertl"); addArchSpecificRPath(TC, Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 0a8a9c6eb6ff..c9e773701ac3 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1551,10 +1551,10 @@ struct DarwinPlatform { options::ID Opt; switch (Platform) { case DarwinPlatformKind::MacOS: - Opt = options::OPT_mmacosx_version_min_EQ; + Opt = options::OPT_mmacos_version_min_EQ; break; case DarwinPlatformKind::IPhoneOS: - Opt = options::OPT_miphoneos_version_min_EQ; + Opt = options::OPT_mios_version_min_EQ; break; case DarwinPlatformKind::TvOS: Opt = options::OPT_mtvos_version_min_EQ; @@ -1727,8 +1727,8 @@ private: Optional<DarwinPlatform> getDeploymentTargetFromOSVersionArg(DerivedArgList &Args, const Driver &TheDriver) { - Arg *OSXVersion = Args.getLastArg(options::OPT_mmacosx_version_min_EQ); - Arg *iOSVersion = Args.getLastArg(options::OPT_miphoneos_version_min_EQ, + Arg *macOSVersion = Args.getLastArg(options::OPT_mmacos_version_min_EQ); + Arg *iOSVersion = Args.getLastArg(options::OPT_mios_version_min_EQ, options::OPT_mios_simulator_version_min_EQ); Arg *TvOSVersion = Args.getLastArg(options::OPT_mtvos_version_min_EQ, @@ -1736,15 +1736,15 @@ getDeploymentTargetFromOSVersionArg(DerivedArgList &Args, Arg *WatchOSVersion = Args.getLastArg(options::OPT_mwatchos_version_min_EQ, options::OPT_mwatchos_simulator_version_min_EQ); - if (OSXVersion) { + if (macOSVersion) { if (iOSVersion || TvOSVersion || WatchOSVersion) { TheDriver.Diag(diag::err_drv_argument_not_allowed_with) - << OSXVersion->getAsString(Args) + << macOSVersion->getAsString(Args) << (iOSVersion ? iOSVersion : TvOSVersion ? TvOSVersion : WatchOSVersion) ->getAsString(Args); } - return DarwinPlatform::createOSVersionArg(Darwin::MacOS, OSXVersion); + return DarwinPlatform::createOSVersionArg(Darwin::MacOS, macOSVersion); } else if (iOSVersion) { if (TvOSVersion || WatchOSVersion) { TheDriver.Diag(diag::err_drv_argument_not_allowed_with) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index f52bb8af5ec9..34396b0b59c2 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2087,7 +2087,7 @@ void Generic_GCC::GCCInstallationDetector::print(raw_ostream &OS) const { bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const { if (BiarchSibling) { - M = BiarchSibling.getValue(); + M = BiarchSibling.value(); return true; } return false; diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 9142dba81d54..ed07e710fc49 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -340,8 +340,8 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, CmdArgs.push_back("-pie"); if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { - CmdArgs.push_back(Args.MakeArgString("-G" + Twine(G.getValue()))); - UseG0 = G.getValue() == 0; + CmdArgs.push_back(Args.MakeArgString("-G" + Twine(G.value()))); + UseG0 = G.value() == 0; } CmdArgs.push_back("-o"); diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index c4b4f8e9b89b..ae7c4c56bf9e 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -339,8 +339,9 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Simplified from Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple. static bool findGccVersion(StringRef LibDir, std::string &GccLibDir, - std::string &Ver) { - auto Version = toolchains::Generic_GCC::GCCVersion::Parse("0.0.0"); + std::string &Ver, + toolchains::Generic_GCC::GCCVersion &Version) { + Version = toolchains::Generic_GCC::GCCVersion::Parse("0.0.0"); std::error_code EC; for (llvm::sys::fs::directory_iterator LI(LibDir, EC), LE; !EC && LI != LE; LI = LI.increment(EC)) { @@ -371,7 +372,7 @@ void toolchains::MinGW::findGccLibDir() { for (StringRef CandidateSysroot : SubdirNames) { llvm::SmallString<1024> LibDir(Base); llvm::sys::path::append(LibDir, CandidateLib, "gcc", CandidateSysroot); - if (findGccVersion(LibDir, GccLibDir, Ver)) { + if (findGccVersion(LibDir, GccLibDir, Ver, GccVer)) { SubdirName = std::string(CandidateSysroot); return; } @@ -438,6 +439,11 @@ toolchains::MinGW::MinGW(const Driver &D, const llvm::Triple &Triple, getFilePaths().push_back(GccLibDir); getFilePaths().push_back( (Base + SubdirName + llvm::sys::path::get_separator() + "lib").str()); + + // Gentoo + getFilePaths().push_back( + (Base + SubdirName + llvm::sys::path::get_separator() + "mingw/lib").str()); + getFilePaths().push_back(Base + "lib"); // openSUSE getFilePaths().push_back(Base + SubdirName + "/sys-root/mingw/lib"); @@ -593,6 +599,11 @@ void toolchains::MinGW::AddClangSystemIncludeArgs(const ArgList &DriverArgs, addSystemInclude(DriverArgs, CC1Args, Base + SubdirName + llvm::sys::path::get_separator() + "include"); + + // Gentoo + addSystemInclude(DriverArgs, CC1Args, + Base + SubdirName + llvm::sys::path::get_separator() + "usr/include"); + addSystemInclude(DriverArgs, CC1Args, Base + "include"); } @@ -620,7 +631,7 @@ void toolchains::MinGW::AddClangCXXStdlibIncludeArgs( } case ToolChain::CST_Libstdcxx: - llvm::SmallVector<llvm::SmallString<1024>, 4> CppIncludeBases; + llvm::SmallVector<llvm::SmallString<1024>, 7> CppIncludeBases; CppIncludeBases.emplace_back(Base); llvm::sys::path::append(CppIncludeBases[0], SubdirName, "include", "c++"); CppIncludeBases.emplace_back(Base); @@ -630,6 +641,15 @@ void toolchains::MinGW::AddClangCXXStdlibIncludeArgs( llvm::sys::path::append(CppIncludeBases[2], "include", "c++", Ver); CppIncludeBases.emplace_back(GccLibDir); llvm::sys::path::append(CppIncludeBases[3], "include", "c++"); + CppIncludeBases.emplace_back(GccLibDir); + llvm::sys::path::append(CppIncludeBases[4], "include", + "g++-v" + GccVer.Text); + CppIncludeBases.emplace_back(GccLibDir); + llvm::sys::path::append(CppIncludeBases[5], "include", + "g++-v" + GccVer.MajorStr + "." + GccVer.MinorStr); + CppIncludeBases.emplace_back(GccLibDir); + llvm::sys::path::append(CppIncludeBases[6], "include", + "g++-v" + GccVer.MajorStr); for (auto &CppIncludeBase : CppIncludeBases) { addSystemInclude(DriverArgs, CC1Args, CppIncludeBase); CppIncludeBase += Slash; diff --git a/clang/lib/Driver/ToolChains/MinGW.h b/clang/lib/Driver/ToolChains/MinGW.h index c9553b4f4652..f15f99dc8a8c 100644 --- a/clang/lib/Driver/ToolChains/MinGW.h +++ b/clang/lib/Driver/ToolChains/MinGW.h @@ -103,6 +103,7 @@ private: std::string Base; std::string GccLibDir; + clang::driver::toolchains::Generic_GCC::GCCVersion GccVer; std::string Ver; std::string SubdirName; mutable std::unique_ptr<tools::gcc::Preprocessor> Preprocessor; diff --git a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp index a048765bc6d3..3491de22d371 100644 --- a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp +++ b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp @@ -201,8 +201,11 @@ void RISCV::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasArg(options::OPT_nostdlib) && !Args.hasArg(options::OPT_nodefaultlibs)) { - if (ToolChain.ShouldLinkCXXStdlib(Args)) - ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); + if (D.CCCIsCXX()) { + if (ToolChain.ShouldLinkCXXStdlib(Args)) + ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); + CmdArgs.push_back("-lm"); + } CmdArgs.push_back("--start-group"); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lgloss"); diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index c4797cea333f..709b781968bf 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -31,14 +31,14 @@ namespace { /// at position \p Key. void serializeObject(Object &Paren, StringRef Key, Optional<Object> Obj) { if (Obj) - Paren[Key] = std::move(Obj.getValue()); + Paren[Key] = std::move(Obj.value()); } /// Helper function to inject a JSON array \p Array into object \p Paren at /// position \p Key. void serializeArray(Object &Paren, StringRef Key, Optional<Array> Array) { if (Array) - Paren[Key] = std::move(Array.getValue()); + Paren[Key] = std::move(Array.value()); } /// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 2cb985cdc4e5..1cd28ab073da 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -37,7 +37,7 @@ static bool shouldIndentWrappedSelectorName(const FormatStyle &Style, // Returns the length of everything up to the first possible line break after // the ), ], } or > matching \c Tok. static unsigned getLengthToMatchingParen(const FormatToken &Tok, - const std::vector<ParenState> &Stack) { + const SmallVector<ParenState> &Stack) { // Normally whether or not a break before T is possible is calculated and // stored in T.CanBreakBefore. Braces, array initializers and text proto // messages like `key: < ... >` are an exception: a break is possible @@ -404,6 +404,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { (State.Column + State.Line->Last->TotalLength - Previous.TotalLength > getColumnLimit(State) || CurrentState.BreakBeforeParameter) && + (!Current.isTrailingComment() || Current.NewlinesBefore > 0) && (Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All || Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon || Style.ColumnLimit != 0)) { @@ -793,6 +794,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) { CurrentState.LastSpace = State.Column; } else if (Previous.is(TT_CtorInitializerColon) && + (!Current.isTrailingComment() || Current.NewlinesBefore > 0) && Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) { CurrentState.Indent = State.Column; @@ -1032,7 +1034,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // be considered bin packing unless the relevant AllowAll option is false or // this is a dict/object literal. bool PreviousIsBreakingCtorInitializerColon = - Previous.is(TT_CtorInitializerColon) && + PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) && Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon; if (!(Previous.isOneOf(tok::l_paren, tok::l_brace, TT_BinaryOperator) || PreviousIsBreakingCtorInitializerColon) || diff --git a/clang/lib/Format/ContinuationIndenter.h b/clang/lib/Format/ContinuationIndenter.h index 494a9727d5ed..620060e68861 100644 --- a/clang/lib/Format/ContinuationIndenter.h +++ b/clang/lib/Format/ContinuationIndenter.h @@ -434,7 +434,7 @@ struct LineState { /// A stack keeping track of properties applying to parenthesis /// levels. - std::vector<ParenState> Stack; + SmallVector<ParenState> Stack; /// Ignore the stack of \c ParenStates for state comparison. /// diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 51526dc2a681..d13907faca43 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -2386,7 +2386,7 @@ private: tooling::Replacements generateFixes() { tooling::Replacements Fixes; - std::vector<FormatToken *> Tokens; + SmallVector<FormatToken *> Tokens; std::copy(DeletedTokens.begin(), DeletedTokens.end(), std::back_inserter(Tokens)); @@ -2580,7 +2580,7 @@ struct JavaImportDirective { StringRef Identifier; StringRef Text; unsigned Offset; - std::vector<StringRef> AssociatedCommentLines; + SmallVector<StringRef> AssociatedCommentLines; bool IsStatic; }; @@ -2983,7 +2983,7 @@ tooling::Replacements sortJavaImports(const FormatStyle &Style, StringRef Code, llvm::Regex ImportRegex(JavaImportRegexPattern); SmallVector<StringRef, 4> Matches; SmallVector<JavaImportDirective, 16> ImportsInBlock; - std::vector<StringRef> AssociatedCommentLines; + SmallVector<StringRef> AssociatedCommentLines; bool FormattingOff = false; @@ -3433,17 +3433,19 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { } const char *StyleOptionHelpDescription = - "Coding style, currently supports:\n" - " LLVM, GNU, Google, Chromium, Microsoft, Mozilla, WebKit.\n" - "Use -style=file to load style configuration from\n" - ".clang-format file located in one of the parent\n" - "directories of the source file (or current\n" - "directory for stdin).\n" - "Use -style=file:<format_file_path> to explicitly specify\n" - "the configuration file.\n" - "Use -style=\"{key: value, ...}\" to set specific\n" - "parameters, e.g.:\n" - " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; + "Set coding style. <string> can be:\n" + "1. A preset: LLVM, GNU, Google, Chromium, Microsoft,\n" + " Mozilla, WebKit.\n" + "2. 'file' to load style configuration from a\n" + " .clang-format file in one of the parent directories\n" + " of the source file (for stdin, see --assume-filename).\n" + " If no .clang-format file is found, falls back to\n" + " --fallback-style.\n" + " --style=file is the default.\n" + "3. 'file:<format_file_path>' to explicitly specify\n" + " the configuration file.\n" + "4. \"{key: value, ...}\" to set specific parameters, e.g.:\n" + " --style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { if (FileName.endswith(".java")) @@ -3498,6 +3500,7 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) { return GuessedLanguage; } +// Update StyleOptionHelpDescription above when changing this. const char *DefaultFormatStyle = "file"; const char *DefaultFallbackStyle = "LLVM"; diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 2c0fee6975c2..832af463206c 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -264,7 +264,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { // We can never place more than ColumnLimit / 3 items in a row (because of the // spaces and the comma). unsigned MaxItems = Style.ColumnLimit / 3; - std::vector<unsigned> MinSizeInColumn; + SmallVector<unsigned> MinSizeInColumn; MinSizeInColumn.reserve(MaxItems); for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { ColumnFormat Format; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index b6cc021affae..73e32979853f 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -497,6 +497,15 @@ public: // in a configured macro expansion. llvm::Optional<MacroExpansion> MacroCtx; + /// When macro expansion introduces nodes with children, those are marked as + /// \c MacroParent. + /// FIXME: The formatting code currently hard-codes the assumption that + /// child nodes are introduced by blocks following an opening brace. + /// This is deeply baked into the code and disentangling this will require + /// signficant refactorings. \c MacroParent allows us to special-case the + /// cases in which we treat parents as block-openers for now. + bool MacroParent = false; + bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } bool is(TokenType TT) const { return getType() == TT; } bool is(const IdentifierInfo *II) const { diff --git a/clang/lib/Format/MacroCallReconstructor.cpp b/clang/lib/Format/MacroCallReconstructor.cpp new file mode 100644 index 000000000000..ccff183cf0da --- /dev/null +++ b/clang/lib/Format/MacroCallReconstructor.cpp @@ -0,0 +1,573 @@ +//===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the implementation of MacroCallReconstructor, which fits +/// an reconstructed macro call to a parsed set of UnwrappedLines. +/// +//===----------------------------------------------------------------------===// + +#include "Macros.h" + +#include "UnwrappedLineParser.h" +#include "clang/Basic/TokenKinds.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/Debug.h" +#include <cassert> + +#define DEBUG_TYPE "format-reconstruct" + +namespace clang { +namespace format { + +// Call \p Call for each token in the unwrapped line given, passing +// the token, its parent and whether it is the first token in the line. +template <typename T> +void forEachToken(const UnwrappedLine &Line, const T &Call, + FormatToken *Parent = nullptr) { + bool First = true; + for (const auto &N : Line.Tokens) { + Call(N.Tok, Parent, First); + First = false; + for (const auto &Child : N.Children) { + forEachToken(Child, Call, N.Tok); + } + } +} + +MacroCallReconstructor::MacroCallReconstructor( + unsigned Level, + const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> + &ActiveExpansions) + : Level(Level), IdToReconstructed(ActiveExpansions) { + Result.Tokens.push_back(std::make_unique<LineNode>()); + ActiveReconstructedLines.push_back(&Result); +} + +void MacroCallReconstructor::addLine(const UnwrappedLine &Line) { + assert(State != Finalized); + LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n"); + forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) { + add(Token, Parent, First); + }); + assert(InProgress || finished()); +} + +UnwrappedLine MacroCallReconstructor::takeResult() && { + finalize(); + assert(Result.Tokens.size() == 1 && Result.Tokens.front()->Children.size() == 1); + UnwrappedLine Final = + createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level); + assert(!Final.Tokens.empty()); + return Final; +} + +// Reconstruct the position of the next \p Token, given its parent \p +// ExpandedParent in the incoming unwrapped line. \p First specifies whether it +// is the first token in a given unwrapped line. +void MacroCallReconstructor::add(FormatToken *Token, + FormatToken *ExpandedParent, bool First) { + LLVM_DEBUG( + llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: " + << (ExpandedParent ? ExpandedParent->TokenText : "<null>") + << ", First: " << First << "\n"); + // In order to be able to find the correct parent in the reconstructed token + // stream, we need to continue the last open reconstruction until we find the + // given token if it is part of the reconstructed token stream. + // + // Note that hidden tokens can be part of the reconstructed stream in nested + // macro calls. + // For example, given + // #define C(x, y) x y + // #define B(x) {x} + // And the call: + // C(a, B(b)) + // The outer macro call will be C(a, {b}), and the hidden token '}' can be + // found in the reconstructed token stream of that expansion level. + // In the expanded token stream + // a {b} + // 'b' is a child of '{'. We need to continue the open expansion of the ',' + // in the call of 'C' in order to correctly set the ',' as the parent of '{', + // so we later set the spelled token 'b' as a child of the ','. + if (!ActiveExpansions.empty() && Token->MacroCtx && + (Token->MacroCtx->Role != MR_Hidden || + ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) { + if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token)) + First = true; + } + + prepareParent(ExpandedParent, First); + + if (Token->MacroCtx) { + // If this token was generated by a macro call, add the reconstructed + // equivalent of the token. + reconstruct(Token); + } else { + // Otherwise, we add it to the current line. + appendToken(Token); + } +} + +// Adjusts the stack of active reconstructed lines so we're ready to push +// tokens. The tokens to be pushed are children of ExpandedParent in the +// expanded code. +// +// This may entail: +// - creating a new line, if the parent is on the active line +// - popping active lines, if the parent is further up the stack +// +// Postcondition: +// ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its +// reconstructed replacement token as a parent (when possible) - that is, the +// last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2] +// is the parent of ActiveReconstructedLines.back() in the reconstructed +// unwrapped line. +void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent, + bool NewLine) { + LLVM_DEBUG({ + llvm::dbgs() << "ParentMap:\n"; + debugParentMap(); + }); + // We want to find the parent in the new unwrapped line, where the expanded + // parent might have been replaced during reconstruction. + FormatToken *Parent = getParentInResult(ExpandedParent); + LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: " + << (Parent ? Parent->TokenText : "<null>") << "\n"); + + FormatToken *OpenMacroParent = nullptr; + if (!MacroCallStructure.empty()) { + // Inside a macro expansion, it is possible to lose track of the correct + // parent - either because it is already popped, for example because it was + // in a different macro argument (e.g. M({, })), or when we work on invalid + // code. + // Thus, we use the innermost macro call's parent as the parent at which + // we stop; this allows us to stay within the macro expansion and keeps + // any problems confined to the extent of the macro call. + OpenMacroParent = + getParentInResult(MacroCallStructure.back().MacroCallLParen); + LLVM_DEBUG(llvm::dbgs() + << "MacroCallLParen: " + << MacroCallStructure.back().MacroCallLParen->TokenText + << ", OpenMacroParent: " + << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>") + << "\n"); + } + if (NewLine || + (!ActiveReconstructedLines.back()->Tokens.empty() && + Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) { + // If we are at the first token in a new line, we want to also + // create a new line in the resulting reconstructed unwrapped line. + while (ActiveReconstructedLines.back()->Tokens.empty() || + (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok && + ActiveReconstructedLines.back()->Tokens.back()->Tok != + OpenMacroParent)) { + ActiveReconstructedLines.pop_back(); + assert(!ActiveReconstructedLines.empty()); + } + assert(!ActiveReconstructedLines.empty()); + ActiveReconstructedLines.back()->Tokens.back()->Children.push_back( + std::make_unique<ReconstructedLine>()); + ActiveReconstructedLines.push_back( + &*ActiveReconstructedLines.back()->Tokens.back()->Children.back()); + } else if (parentLine().Tokens.back()->Tok != Parent) { + // If we're not the first token in a new line, pop lines until we find + // the child of \c Parent in the stack. + while (Parent != parentLine().Tokens.back()->Tok && + parentLine().Tokens.back()->Tok && + parentLine().Tokens.back()->Tok != OpenMacroParent) { + ActiveReconstructedLines.pop_back(); + assert(!ActiveReconstructedLines.empty()); + } + } + assert(!ActiveReconstructedLines.empty()); +} + +// For a given \p Parent in the incoming expanded token stream, find the +// corresponding parent in the output. +FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) { + FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent); + if (!Mapped) + return Parent; + for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent)) { + Parent = Mapped; + } + // If we use a different token than the parent in the expanded token stream + // as parent, mark it as a special parent, so the formatting code knows it + // needs to have its children formatted. + Parent->MacroParent = true; + return Parent; +} + +// Reconstruct a \p Token that was expanded from a macro call. +void MacroCallReconstructor::reconstruct(FormatToken *Token) { + assert(Token->MacroCtx); + // A single token can be the only result of a macro call: + // Given: #define ID(x, y) ; + // And the call: ID(<some>, <tokens>) + // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>). + if (Token->MacroCtx->StartOfExpansion) { + startReconstruction(Token); + // If the order of tokens in the expanded token stream is not the + // same as the order of tokens in the reconstructed stream, we need + // to reconstruct tokens that arrive later in the stream. + if (Token->MacroCtx->Role != MR_Hidden) { + reconstructActiveCallUntil(Token); + } + } + assert(!ActiveExpansions.empty()); + if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) { + assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size()); + if (Token->MacroCtx->Role != MR_Hidden) { + // The current token in the reconstructed token stream must be the token + // we're looking for - we either arrive here after startReconstruction, + // which initiates the stream to the first token, or after + // continueReconstructionUntil skipped until the expected token in the + // reconstructed stream at the start of add(...). + assert(ActiveExpansions.back().SpelledI->Tok == Token); + processNextReconstructed(); + } else if (!currentLine()->Tokens.empty()) { + // Map all hidden tokens to the last visible token in the output. + // If the hidden token is a parent, we'll use the last visible + // token as the parent of the hidden token's children. + SpelledParentToReconstructedParent[Token] = + currentLine()->Tokens.back()->Tok; + } else { + for (auto I = ActiveReconstructedLines.rbegin(), + E = ActiveReconstructedLines.rend(); + I != E; ++I) { + if (!(*I)->Tokens.empty()) { + SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok; + break; + } + } + } + } + if (Token->MacroCtx->EndOfExpansion) + endReconstruction(Token); +} + +// Given a \p Token that starts an expansion, reconstruct the beginning of the +// macro call. +// For example, given: #define ID(x) x +// And the call: ID(int a) +// Reconstructs: ID( +void MacroCallReconstructor::startReconstruction(FormatToken *Token) { + assert(Token->MacroCtx); + assert(!Token->MacroCtx->ExpandedFrom.empty()); + assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size()); +#ifndef NDEBUG + // Check that the token's reconstruction stack matches our current + // reconstruction stack. + for (size_t I = 0; I < ActiveExpansions.size(); ++I) { + assert(ActiveExpansions[I].ID == + Token->MacroCtx + ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]); + } +#endif + // Start reconstruction for all calls for which this token is the first token + // generated by the call. + // Note that the token's expanded from stack is inside-to-outside, and the + // expansions for which this token is not the first are the outermost ones. + ArrayRef<FormatToken *> StartedMacros = + makeArrayRef(Token->MacroCtx->ExpandedFrom) + .drop_back(ActiveExpansions.size()); + assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion); + // We reconstruct macro calls outside-to-inside. + for (FormatToken *ID : llvm::reverse(StartedMacros)) { + // We found a macro call to be reconstructed; the next time our + // reconstruction stack is empty we know we finished an reconstruction. +#ifndef NDEBUG + State = InProgress; +#endif + // Put the reconstructed macro call's token into our reconstruction stack. + auto IU = IdToReconstructed.find(ID); + assert(IU != IdToReconstructed.end()); + ActiveExpansions.push_back( + {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()}); + // Process the macro call's identifier. + processNextReconstructed(); + if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE) + continue; + if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) { + // Process the optional opening parenthesis. + processNextReconstructed(); + } + } +} + +// Add all tokens in the reconstruction stream to the output until we find the +// given \p Token. +bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) { + assert(!ActiveExpansions.empty()); + bool PassedMacroComma = false; + // FIXME: If Token was already expanded earlier, due to + // a change in order, we will not find it, but need to + // skip it. + while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE && + ActiveExpansions.back().SpelledI->Tok != Token) { + PassedMacroComma = processNextReconstructed() || PassedMacroComma; + } + return PassedMacroComma; +} + +// End all reconstructions for which \p Token is the final token. +void MacroCallReconstructor::endReconstruction(FormatToken *Token) { + assert(Token->MacroCtx && + (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion)); + for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) { +#ifndef NDEBUG + // Check all remaining tokens but the final closing parenthesis and optional + // trailing comment were already reconstructed at an inner expansion level. + for (auto T = ActiveExpansions.back().SpelledI; + T != ActiveExpansions.back().SpelledE; ++T) { + FormatToken *Token = T->Tok; + bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE || + std::next(T)->Tok->isTrailingComment()) && + !Token->MacroCtx && Token->is(tok::r_paren); + bool TrailingComment = Token->isTrailingComment(); + bool PreviousLevel = + Token->MacroCtx && + (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()); + if (!ClosingParen && !TrailingComment && !PreviousLevel) { + llvm::dbgs() << "At token: " << Token->TokenText << "\n"; + } + // In addition to the following cases, we can also run into this + // when a macro call had more arguments than expected; in that case, + // the comma and the remaining tokens in the macro call will potentially + // end up in the line when we finish the expansion. + // FIXME: Add the information which arguments are unused, and assert + // one of the cases below plus reconstructed macro argument tokens. + // assert(ClosingParen || TrailingComment || PreviousLevel); + } +#endif + // Handle the remaining open tokens: + // - expand the closing parenthesis, if it exists, including an optional + // trailing comment + // - handle tokens that were already reconstructed at an inner expansion + // level + // - handle tokens when a macro call had more than the expected number of + // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end + // up with the sequence ", b, c)" being open at the end of the + // reconstruction; we want to gracefully handle that case + // + // FIXME: See the above debug-check for what we will need to do to be + // able to assert this. + for (auto T = ActiveExpansions.back().SpelledI; + T != ActiveExpansions.back().SpelledE; ++T) { + processNextReconstructed(); + } + ActiveExpansions.pop_back(); + } +} + +void MacroCallReconstructor::debugParentMap() const { + llvm::DenseSet<FormatToken *> Values; + for (const auto &P : SpelledParentToReconstructedParent) + Values.insert(P.second); + + for (const auto &P : SpelledParentToReconstructedParent) { + if (Values.contains(P.first)) + continue; + llvm::dbgs() << (P.first ? P.first->TokenText : "<null>"); + for (auto I = SpelledParentToReconstructedParent.find(P.first), + E = SpelledParentToReconstructedParent.end(); + I != E; I = SpelledParentToReconstructedParent.find(I->second)) { + llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>"); + } + llvm::dbgs() << "\n"; + } +} + +// If visible, add the next token of the reconstructed token sequence to the +// output. Returns whether reconstruction passed a comma that is part of a +// macro call. +bool MacroCallReconstructor::processNextReconstructed() { + FormatToken *Token = ActiveExpansions.back().SpelledI->Tok; + ++ActiveExpansions.back().SpelledI; + if (Token->MacroCtx) { + // Skip tokens that are not part of the macro call. + if (Token->MacroCtx->Role == MR_Hidden) { + return false; + } + // Skip tokens we already expanded during an inner reconstruction. + // For example, given: #define ID(x) {x} + // And the call: ID(ID(f)) + // We get two reconstructions: + // ID(f) -> {f} + // ID({f}) -> {{f}} + // We reconstruct f during the first reconstruction, and skip it during the + // second reconstruction. + if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()) { + return false; + } + } + // Tokens that do not have a macro context are tokens in that are part of the + // macro call that have not taken part in expansion. + if (!Token->MacroCtx) { + // Put the parentheses and commas of a macro call into the same line; + // if the arguments produce new unwrapped lines, they will become children + // of the corresponding opening parenthesis or comma tokens in the + // reconstructed call. + if (Token->is(tok::l_paren)) { + MacroCallStructure.push_back(MacroCallState( + currentLine(), parentLine().Tokens.back()->Tok, Token)); + // All tokens that are children of the previous line's last token in the + // reconstructed token stream will now be children of the l_paren token. + // For example, for the line containing the macro calls: + // auto x = ID({ID(2)}); + // We will build up a map <null> -> ( -> ( with the first and second + // l_paren of the macro call respectively. New lines that come in with a + // <null> parent will then become children of the l_paren token of the + // currently innermost macro call. + SpelledParentToReconstructedParent[MacroCallStructure.back() + .ParentLastToken] = Token; + appendToken(Token); + prepareParent(Token, /*NewLine=*/true); + Token->MacroParent = true; + return false; + } + if (!MacroCallStructure.empty()) { + if (Token->is(tok::comma)) { + // Make new lines inside the next argument children of the comma token. + SpelledParentToReconstructedParent + [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token; + Token->MacroParent = true; + appendToken(Token, MacroCallStructure.back().Line); + prepareParent(Token, /*NewLine=*/true); + return true; + } + if (Token->is(tok::r_paren)) { + appendToken(Token, MacroCallStructure.back().Line); + SpelledParentToReconstructedParent.erase( + MacroCallStructure.back().ParentLastToken); + MacroCallStructure.pop_back(); + return false; + } + } + } + // Note that any tokens that are tagged with MR_None have been passed as + // arguments to the macro that have not been expanded, for example: + // Given: #define ID(X) x + // When calling: ID(a, b) + // 'b' will be part of the reconstructed token stream, but tagged MR_None. + // Given that erroring out in this case would be disruptive, we continue + // pushing the (unformatted) token. + // FIXME: This can lead to unfortunate formatting decisions - give the user + // a hint that their macro definition is broken. + appendToken(Token); + return false; +} + +void MacroCallReconstructor::finalize() { +#ifndef NDEBUG + assert(State != Finalized && finished()); + State = Finalized; +#endif + + // We created corresponding unwrapped lines for each incoming line as children + // the the toplevel null token. + assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty()); + LLVM_DEBUG({ + llvm::dbgs() << "Finalizing reconstructed lines:\n"; + debug(Result, 0); + }); + + // The first line becomes the top level line in the resulting unwrapped line. + LineNode &Top = *Result.Tokens.front(); + auto *I = Top.Children.begin(); + // Every subsequent line will become a child of the last token in the previous + // line, which is the token prior to the first token in the line. + LineNode *Last = (*I)->Tokens.back().get(); + ++I; + for (auto *E = Top.Children.end(); I != E; ++I) { + assert(Last->Children.empty()); + Last->Children.push_back(std::move(*I)); + + // Mark the previous line's last token as generated by a macro expansion + // so the formatting algorithm can take that into account. + Last->Tok->MacroParent = true; + + Last = Last->Children.back()->Tokens.back().get(); + } + Top.Children.resize(1); +} + +void MacroCallReconstructor::appendToken(FormatToken *Token, + ReconstructedLine *L) { + L = L ? L : currentLine(); + LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n"); + L->Tokens.push_back(std::make_unique<LineNode>(Token)); +} + +UnwrappedLine +MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line, + int Level) { + UnwrappedLine Result; + Result.Level = Level; + for (const auto &N : Line.Tokens) { + Result.Tokens.push_back(N->Tok); + UnwrappedLineNode &Current = Result.Tokens.back(); + for (const auto &Child : N->Children) { + if (Child->Tokens.empty()) + continue; + Current.Children.push_back(createUnwrappedLine(*Child, Level + 1)); + } + if (Current.Children.size() == 1 && + Current.Tok->isOneOf(tok::l_paren, tok::comma)) { + Result.Tokens.splice(Result.Tokens.end(), + Current.Children.front().Tokens); + Current.Children.clear(); + } + } + return Result; +} + +void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) { + for (int i = 0; i < Level; ++i) + llvm::dbgs() << " "; + for (const auto &N : Line.Tokens) { + if (!N) + continue; + if (N->Tok) + llvm::dbgs() << N->Tok->TokenText << " "; + for (const auto &Child : N->Children) { + llvm::dbgs() << "\n"; + debug(*Child, Level + 1); + for (int i = 0; i < Level; ++i) + llvm::dbgs() << " "; + } + } + llvm::dbgs() << "\n"; +} + +MacroCallReconstructor::ReconstructedLine & +MacroCallReconstructor::parentLine() { + return **std::prev(std::prev(ActiveReconstructedLines.end())); +} + +MacroCallReconstructor::ReconstructedLine * +MacroCallReconstructor::currentLine() { + return ActiveReconstructedLines.back(); +} + +MacroCallReconstructor::MacroCallState::MacroCallState( + MacroCallReconstructor::ReconstructedLine *Line, + FormatToken *ParentLastToken, FormatToken *MacroCallLParen) + : Line(Line), ParentLastToken(ParentLastToken), + MacroCallLParen(MacroCallLParen) { + LLVM_DEBUG( + llvm::dbgs() << "ParentLastToken: " + << (ParentLastToken ? ParentLastToken->TokenText : "<null>") + << "\n"); + + assert(MacroCallLParen->is(tok::l_paren)); +} + +} // namespace format +} // namespace clang diff --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h index da03beb09145..b26799c20f8c 100644 --- a/clang/lib/Format/Macros.h +++ b/clang/lib/Format/Macros.h @@ -1,4 +1,4 @@ -//===--- MacroExpander.h - Format C++ code ----------------------*- C++ -*-===// +//===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -22,40 +22,38 @@ /// spelled token streams into expanded token streams when it encounters a /// macro call. The UnwrappedLineParser continues to parse UnwrappedLines /// from the expanded token stream. -/// After the expanded unwrapped lines are parsed, the MacroUnexpander matches -/// the spelled token stream into unwrapped lines that best resemble the -/// structure of the expanded unwrapped lines. +/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor +/// matches the spelled token stream into unwrapped lines that best resemble the +/// structure of the expanded unwrapped lines. These reconstructed unwrapped +/// lines are aliasing the tokens in the expanded token stream, so that token +/// annotations will be reused when formatting the spelled macro calls. /// -/// When formatting, clang-format formats the expanded unwrapped lines first, -/// determining the token types. Next, it formats the spelled unwrapped lines, -/// keeping the token types fixed, while allowing other formatting decisions -/// to change. +/// When formatting, clang-format annotates and formats the expanded unwrapped +/// lines first, determining the token types. Next, it formats the spelled +/// unwrapped lines, keeping the token types fixed, while allowing other +/// formatting decisions to change. /// //===----------------------------------------------------------------------===// #ifndef CLANG_LIB_FORMAT_MACROS_H #define CLANG_LIB_FORMAT_MACROS_H +#include <list> +#include <map> #include <string> -#include <unordered_map> #include <vector> -#include "Encoding.h" #include "FormatToken.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -namespace llvm { -class MemoryBuffer; -} // namespace llvm - namespace clang { -class IdentifierTable; -class SourceManager; - namespace format { -struct FormatStyle; + +struct UnwrappedLine; +struct UnwrappedLineNode; /// Takes a set of macro definitions as strings and allows expanding calls to /// those macros. @@ -130,10 +128,253 @@ private: const FormatStyle &Style; llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator; IdentifierTable &IdentTable; - std::vector<std::unique_ptr<llvm::MemoryBuffer>> Buffers; + SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers; llvm::StringMap<Definition> Definitions; }; +/// Converts a sequence of UnwrappedLines containing expanded macros into a +/// single UnwrappedLine containing the macro calls. This UnwrappedLine may be +/// broken into child lines, in a way that best conveys the structure of the +/// expanded code. +/// +/// In the simplest case, a spelled UnwrappedLine contains one macro, and after +/// expanding it we have one expanded UnwrappedLine. In general, macro +/// expansions can span UnwrappedLines, and multiple macros can contribute +/// tokens to the same line. We keep consuming expanded lines until: +/// * all expansions that started have finished (we're not chopping any macros +/// in half) +/// * *and* we've reached the end of a *spelled* unwrapped line. +/// +/// A single UnwrappedLine represents this chunk of code. +/// +/// After this point, the state of the spelled/expanded stream is "in sync" +/// (both at the start of an UnwrappedLine, with no macros open), so the +/// Unexpander can be thrown away and parsing can continue. +/// +/// Given a mapping from the macro name identifier token in the macro call +/// to the tokens of the macro call, for example: +/// CLASSA -> CLASSA({public: void x();}) +/// +/// When getting the formatted lines of the expansion via the \c addLine method +/// (each '->' specifies a call to \c addLine ): +/// -> class A { +/// -> public: +/// -> void x(); +/// -> }; +/// +/// Creates the tree of unwrapped lines containing the macro call tokens so that +/// the macro call tokens fit the semantic structure of the expanded formatted +/// lines: +/// -> CLASSA({ +/// -> public: +/// -> void x(); +/// -> }) +class MacroCallReconstructor { +public: + /// Create an Reconstructor whose resulting \p UnwrappedLine will start at + /// \p Level, using the map from name identifier token to the corresponding + /// tokens of the spelled macro call. + MacroCallReconstructor( + unsigned Level, + const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> + &ActiveExpansions); + + /// For the given \p Line, match all occurences of tokens expanded from a + /// macro to unwrapped lines in the spelled macro call so that the resulting + /// tree of unwrapped lines best resembles the structure of unwrapped lines + /// passed in via \c addLine. + void addLine(const UnwrappedLine &Line); + + /// Check whether at the current state there is no open macro expansion + /// that needs to be processed to finish an macro call. + /// Only when \c finished() is true, \c takeResult() can be called to retrieve + /// the resulting \c UnwrappedLine. + /// If there are multiple subsequent macro calls within an unwrapped line in + /// the spelled token stream, the calling code may also continue to call + /// \c addLine() when \c finished() is true. + bool finished() const { return ActiveExpansions.empty(); } + + /// Retrieve the formatted \c UnwrappedLine containing the orginal + /// macro calls, formatted according to the expanded token stream received + /// via \c addLine(). + /// Generally, this line tries to have the same structure as the expanded, + /// formatted unwrapped lines handed in via \c addLine(), with the exception + /// that for multiple top-level lines, each subsequent line will be the + /// child of the last token in its predecessor. This representation is chosen + /// because it is a precondition to the formatter that we get what looks like + /// a single statement in a single \c UnwrappedLine (i.e. matching parens). + /// + /// If a token in a macro argument is a child of a token in the expansion, + /// the parent will be the corresponding token in the macro call. + /// For example: + /// #define C(a, b) class C { a b + /// C(int x;, int y;) + /// would expand to + /// class C { int x; int y; + /// where in a formatted line "int x;" and "int y;" would both be new separate + /// lines. + /// + /// In the result, "int x;" will be a child of the opening parenthesis in "C(" + /// and "int y;" will be a child of the "," token: + /// C ( + /// \- int x; + /// , + /// \- int y; + /// ) + UnwrappedLine takeResult() &&; + +private: + void add(FormatToken *Token, FormatToken *ExpandedParent, bool First); + void prepareParent(FormatToken *ExpandedParent, bool First); + FormatToken *getParentInResult(FormatToken *Parent); + void reconstruct(FormatToken *Token); + void startReconstruction(FormatToken *Token); + bool reconstructActiveCallUntil(FormatToken *Token); + void endReconstruction(FormatToken *Token); + bool processNextReconstructed(); + void finalize(); + + struct ReconstructedLine; + + void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr); + UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level); + void debug(const ReconstructedLine &Line, int Level); + ReconstructedLine &parentLine(); + ReconstructedLine *currentLine(); + void debugParentMap() const; + +#ifndef NDEBUG + enum ReconstructorState { + Start, // No macro expansion was found in the input yet. + InProgress, // During a macro reconstruction. + Finalized, // Past macro reconstruction, the result is finalized. + }; + ReconstructorState State = Start; +#endif + + // Node in which we build up the resulting unwrapped line; this type is + // analogous to UnwrappedLineNode. + struct LineNode { + LineNode() = default; + LineNode(FormatToken *Tok) : Tok(Tok) {} + FormatToken *Tok = nullptr; + llvm::SmallVector<std::unique_ptr<ReconstructedLine>> Children; + }; + + // Line in which we build up the resulting unwrapped line. + // FIXME: Investigate changing UnwrappedLine to a pointer type and using it + // instead of rolling our own type. + struct ReconstructedLine { + llvm::SmallVector<std::unique_ptr<LineNode>> Tokens; + }; + + // The line in which we collect the resulting reconstructed output. + // To reduce special cases in the algorithm, the first level of the line + // contains a single null token that has the reconstructed incoming + // lines as children. + // In the end, we stich the lines together so that each subsequent line + // is a child of the last token of the previous line. This is necessary + // in order to format the overall expression as a single logical line - + // if we created separate lines, we'd format them with their own top-level + // indent depending on the semantic structure, which is not desired. + ReconstructedLine Result; + + // Stack of currently "open" lines, where each line's predecessor's last + // token is the parent token for that line. + llvm::SmallVector<ReconstructedLine *> ActiveReconstructedLines; + + // Maps from the expanded token to the token that takes its place in the + // reconstructed token stream in terms of parent-child relationships. + // Note that it might take multiple steps to arrive at the correct + // parent in the output. + // Given: #define C(a, b) []() { a; b; } + // And a call: C(f(), g()) + // The structure in the incoming formatted unwrapped line will be: + // []() { + // |- f(); + // \- g(); + // } + // with f and g being children of the opening brace. + // In the reconstructed call: + // C(f(), g()) + // \- f() + // \- g() + // We want f to be a child of the opening parenthesis and g to be a child + // of the comma token in the macro call. + // Thus, we map + // { -> ( + // and add + // ( -> , + // once we're past the comma in the reconstruction. + llvm::DenseMap<FormatToken *, FormatToken *> + SpelledParentToReconstructedParent; + + // Keeps track of a single expansion while we're reconstructing tokens it + // generated. + struct Expansion { + // The identifier token of the macro call. + FormatToken *ID; + // Our current position in the reconstruction. + std::list<UnwrappedLineNode>::iterator SpelledI; + // The end of the reconstructed token sequence. + std::list<UnwrappedLineNode>::iterator SpelledE; + }; + + // Stack of macro calls for which we're in the middle of an expansion. + llvm::SmallVector<Expansion> ActiveExpansions; + + struct MacroCallState { + MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken, + FormatToken *MacroCallLParen); + + ReconstructedLine *Line; + + // The last token in the parent line or expansion, or nullptr if the macro + // expansion is on a top-level line. + // + // For example, in the macro call: + // auto f = []() { ID(1); }; + // The MacroCallState for ID will have '{' as ParentLastToken. + // + // In the macro call: + // ID(ID(void f())); + // The MacroCallState of the outer ID will have nullptr as ParentLastToken, + // while the MacroCallState for the inner ID will have the '(' of the outer + // ID as ParentLastToken. + // + // In the macro call: + // ID2(a, ID(b)); + // The MacroCallState of ID will have ',' as ParentLastToken. + FormatToken *ParentLastToken; + + // The l_paren of this MacroCallState's macro call. + FormatToken *MacroCallLParen; + }; + + // Keeps track of the lines into which the opening brace/parenthesis & + // argument separating commas for each level in the macro call go in order to + // put the corresponding closing brace/parenthesis into the same line in the + // output and keep track of which parents in the expanded token stream map to + // which tokens in the reconstructed stream. + // When an opening brace/parenthesis has children, we want the structure of + // the output line to be: + // |- MACRO + // |- ( + // | \- <argument> + // |- , + // | \- <argument> + // \- ) + llvm::SmallVector<MacroCallState> MacroCallStructure; + + // Level the generated UnwrappedLine will be at. + const unsigned Level; + + // Maps from identifier of the macro call to an unwrapped line containing + // all tokens of the macro call. + const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> + &IdToReconstructed; +}; + } // namespace format } // namespace clang diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 029cb9097871..98c012994f45 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4734,7 +4734,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, // the first list element. Otherwise, it should be placed outside of the // list. return Left.is(BK_BracedInit) || - (Left.is(TT_CtorInitializerColon) && + (Left.is(TT_CtorInitializerColon) && Right.NewlinesBefore > 0 && Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon); } if (Left.is(tok::question) && Right.is(tok::colon)) @@ -4894,8 +4894,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) return true; - if (Left.is(TT_CtorInitializerColon)) - return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon; + if (Left.is(TT_CtorInitializerColon)) { + return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon && + (!Right.isTrailingComment() || Right.NewlinesBefore > 0); + } if (Right.is(TT_CtorInitializerColon)) return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon; if (Left.is(TT_CtorInitializerComma) && diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 22509a504246..abeb93d23776 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -59,14 +59,12 @@ public: Offset = getIndentOffset(*Line.First); // Update the indent level cache size so that we can rely on it // having the right size in adjustToUnmodifiedline. - while (IndentForLevel.size() <= Line.Level) - IndentForLevel.push_back(-1); + skipLine(Line, /*UnknownIndent=*/true); if (Line.InPPDirective) { unsigned IndentWidth = (Style.PPIndentWidth >= 0) ? Style.PPIndentWidth : Style.IndentWidth; Indent = Line.Level * IndentWidth + AdditionalIndent; } else { - IndentForLevel.resize(Line.Level + 1); Indent = getIndent(Line.Level); } if (static_cast<int>(Indent) + Offset >= 0) @@ -77,9 +75,9 @@ public: /// Update the indent state given that \p Line indent should be /// skipped. - void skipLine(const AnnotatedLine &Line) { - while (IndentForLevel.size() <= Line.Level) - IndentForLevel.push_back(Indent); + void skipLine(const AnnotatedLine &Line, bool UnknownIndent = false) { + if (Line.Level >= IndentForLevel.size()) + IndentForLevel.resize(Line.Level + 1, UnknownIndent ? -1 : Indent); } /// Update the level indent to adapt to the given \p Line. @@ -91,6 +89,7 @@ public: unsigned LevelIndent = Line.First->OriginalColumn; if (static_cast<int>(LevelIndent) - Offset >= 0) LevelIndent -= Offset; + assert(Line.Level < IndentForLevel.size()); if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) && !Line.InPPDirective) { IndentForLevel[Line.Level] = LevelIndent; @@ -159,7 +158,7 @@ private: const unsigned AdditionalIndent; /// The indent in characters for each level. - std::vector<int> IndentForLevel; + SmallVector<int> IndentForLevel; /// Offset of the current line relative to the indent level. /// @@ -1133,7 +1132,7 @@ private: typedef std::pair<OrderedPenalty, StateNode *> QueueItem; /// The BFS queue type. - typedef std::priority_queue<QueueItem, std::vector<QueueItem>, + typedef std::priority_queue<QueueItem, SmallVector<QueueItem>, std::greater<QueueItem>> QueueType; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index d3383292f7a3..97c3d86282a0 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -15,6 +15,7 @@ #include "UnwrappedLineParser.h" #include "FormatToken.h" #include "TokenAnnotator.h" +#include "clang/Basic/TokenKinds.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -1910,15 +1911,12 @@ void UnwrappedLineParser::parseStructuralElement( break; auto OneTokenSoFar = [&]() { - const UnwrappedLineNode *Tok = &Line->Tokens.front(), - *End = Tok + Line->Tokens.size(); - while (Tok != End && Tok->Tok->is(tok::comment)) - ++Tok; - // In Verilog, macro invocations start with a backtick which the code - // treats as a hash. Skip it. - if (Style.isVerilog() && Tok != End && Tok->Tok->is(tok::hash)) - ++Tok; - return End - Tok == 1; + auto I = Line->Tokens.begin(), E = Line->Tokens.end(); + while (I != E && I->Tok->is(tok::comment)) + ++I; + while (I != E && Style.isVerilog() && I->Tok->is(tok::hash)) + ++I; + return I != E && (++I == E); }; if (OneTokenSoFar()) { if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) { diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 8f63870412d0..3394bfab8b8e 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -20,6 +20,7 @@ #include "clang/Format/Format.h" #include "llvm/ADT/BitVector.h" #include "llvm/Support/Regex.h" +#include <list> #include <stack> #include <vector> @@ -38,7 +39,7 @@ struct UnwrappedLine { UnwrappedLine(); /// The \c Tokens comprising this \c UnwrappedLine. - std::vector<UnwrappedLineNode> Tokens; + std::list<UnwrappedLineNode> Tokens; /// The indent level of the \c UnwrappedLine. unsigned Level; diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index b982ca72c78c..2cd7efd862ec 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -115,9 +115,9 @@ bool CompilerInstance::createTarget() { auto TO = std::make_shared<TargetOptions>(); TO->Triple = llvm::Triple::normalize(getFrontendOpts().AuxTriple); if (getFrontendOpts().AuxTargetCPU) - TO->CPU = getFrontendOpts().AuxTargetCPU.getValue(); + TO->CPU = getFrontendOpts().AuxTargetCPU.value(); if (getFrontendOpts().AuxTargetFeatures) - TO->FeaturesAsWritten = getFrontendOpts().AuxTargetFeatures.getValue(); + TO->FeaturesAsWritten = getFrontendOpts().AuxTargetFeatures.value(); TO->HostTriple = getTarget().getTriple().str(); setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO)); } @@ -757,6 +757,8 @@ void CompilerInstance::createSema(TranslationUnitKind TUKind, // Output Files void CompilerInstance::clearOutputFiles(bool EraseFiles) { + // The ASTConsumer can own streams that write to the output files. + assert(!hasASTConsumer() && "ASTConsumer should be reset"); // Ignore errors that occur when trying to discard the temp file. for (OutputFile &OF : OutputFiles) { if (EraseFiles) { @@ -1235,8 +1237,7 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc, // Execute the action to actually build the module in-place. Use a separate // thread so that we get a stack large enough. - llvm::CrashRecoveryContext CRC; - CRC.RunSafelyOnThread( + bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnThread( [&]() { GenerateModuleFromModuleMapAction Action; Instance.ExecuteAction(Action); @@ -1249,9 +1250,15 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc, diag::remark_module_build_done) << ModuleName; - // Delete any remaining temporary files related to Instance, in case the - // module generation thread crashed. - Instance.clearOutputFiles(/*EraseFiles=*/true); + if (Crashed) { + // Clear the ASTConsumer if it hasn't been already, in case it owns streams + // that must be closed before clearing output files. + Instance.setSema(nullptr); + Instance.setASTConsumer(nullptr); + + // Delete any remaining temporary files related to Instance. + Instance.clearOutputFiles(/*EraseFiles=*/true); + } // If \p AllowPCMWithCompilerErrors is set return 'success' even if errors // occurred. diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index abef4cf65496..48cd6a394107 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1485,6 +1485,9 @@ void CompilerInvocation::GenerateCodeGenArgs( if (Opts.IBTSeal) GenerateArg(Args, OPT_mibt_seal, SA); + if (Opts.FunctionReturnThunks) + GenerateArg(Args, OPT_mfunction_return_EQ, "thunk-extern", SA); + for (const auto &F : Opts.LinkBitcodeFiles) { bool Builtint = F.LinkFlags == llvm::Linker::Flags::LinkOnlyNeeded && F.PropagateAttrs && F.Internalize; @@ -1825,6 +1828,27 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } + if (const Arg *A = Args.getLastArg(OPT_mfunction_return_EQ)) { + auto Val = llvm::StringSwitch<llvm::FunctionReturnThunksKind>(A->getValue()) + .Case("keep", llvm::FunctionReturnThunksKind::Keep) + .Case("thunk-extern", llvm::FunctionReturnThunksKind::Extern) + .Default(llvm::FunctionReturnThunksKind::Invalid); + // SystemZ might want to add support for "expolines." + if (!T.isX86()) + Diags.Report(diag::err_drv_argument_not_allowed_with) + << A->getSpelling() << T.getTriple(); + else if (Val == llvm::FunctionReturnThunksKind::Invalid) + Diags.Report(diag::err_drv_invalid_value) + << A->getAsString(Args) << A->getValue(); + else if (Val == llvm::FunctionReturnThunksKind::Extern && + Args.getLastArgValue(OPT_mcmodel_EQ).equals("large")) + Diags.Report(diag::err_drv_argument_not_allowed_with) + << A->getAsString(Args) + << Args.getLastArg(OPT_mcmodel_EQ)->getAsString(Args); + else + Opts.FunctionReturnThunks = static_cast<unsigned>(Val); + } + if (Opts.PrepareForLTO && Args.hasArg(OPT_mibt_seal)) Opts.IBTSeal = 1; @@ -1952,7 +1976,7 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, } else { Opts.DiagnosticsHotnessThreshold = *ResultOrErr; if ((!Opts.DiagnosticsHotnessThreshold || - Opts.DiagnosticsHotnessThreshold.getValue() > 0) && + Opts.DiagnosticsHotnessThreshold.value() > 0) && !UsingProfile) Diags.Report(diag::warn_drv_diagnostics_hotness_requires_pgo) << "-fdiagnostics-hotness-threshold="; @@ -1969,7 +1993,7 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, } else { Opts.DiagnosticsMisExpectTolerance = *ResultOrErr; if ((!Opts.DiagnosticsMisExpectTolerance || - Opts.DiagnosticsMisExpectTolerance.getValue() > 0) && + Opts.DiagnosticsMisExpectTolerance.value() > 0) && !UsingProfile) Diags.Report(diag::warn_drv_diagnostics_misexpect_requires_pgo) << "-fdiagnostics-misexpect-tolerance="; diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 65160dd7e0b1..ed3e314cc73b 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -24,6 +24,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/Parse/ParseAST.h" +#include "clang/Sema/HLSLExternalSemaSource.h" #include "clang/Serialization/ASTDeserializationListener.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/GlobalModuleIndex.h" @@ -580,6 +581,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI, auto FailureCleanup = llvm::make_scope_exit([&]() { if (HasBegunSourceFile) CI.getDiagnosticClient().EndSourceFile(); + CI.setASTConsumer(nullptr); CI.clearOutputFiles(/*EraseFiles=*/true); CI.getLangOpts().setCompilingModule(LangOptions::CMK_None); setCurrentInput(FrontendInputFile()); @@ -1014,6 +1016,13 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI, CI.getASTContext().setExternalSource(Override); } + // Setup HLSL External Sema Source + if (CI.getLangOpts().HLSL && CI.hasASTContext()) { + IntrusiveRefCntPtr<ExternalASTSource> HLSLSema( + new HLSLExternalSemaSource()); + CI.getASTContext().setExternalSource(HLSLSema); + } + FailureCleanup.release(); return true; } diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index f61c83a2a465..f833541caa25 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -854,8 +854,9 @@ void DumpModuleInfoAction::ExecuteAction() { std::error_code EC; OutFile.reset(new llvm::raw_fd_ostream(OutputFileName.str(), EC, llvm::sys::fs::OF_TextWithCRLF)); + OutputStream = OutFile.get(); } - llvm::raw_ostream &Out = OutFile.get()? *OutFile.get() : llvm::outs(); + llvm::raw_ostream &Out = OutputStream ? *OutputStream : llvm::outs(); Out << "Information for module file '" << getCurrentFile() << "':\n"; auto &FileMgr = getCompilerInstance().getFileManager(); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index fe3736c07c3c..d0360696ff9c 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -832,11 +832,11 @@ static void InitializePredefinedMacros(const TargetInfo &TI, unsigned minor = 0; if (tuple.getMinor()) - minor = tuple.getMinor().getValue(); + minor = tuple.getMinor().value(); unsigned subminor = 0; if (tuple.getSubminor()) - subminor = tuple.getSubminor().getValue(); + subminor = tuple.getSubminor().value(); Builder.defineMacro("__OBJFW_RUNTIME_ABI__", Twine(tuple.getMajor() * 10000 + minor * 100 + diff --git a/clang/lib/Headers/hlsl/hlsl_basic_types.h b/clang/lib/Headers/hlsl/hlsl_basic_types.h index 2069990f5c06..e68715f1a6a4 100644 --- a/clang/lib/Headers/hlsl/hlsl_basic_types.h +++ b/clang/lib/Headers/hlsl/hlsl_basic_types.h @@ -27,38 +27,38 @@ typedef long int64_t; // built-in vector data types: #ifdef __HLSL_ENABLE_16_BIT -typedef int16_t int16_t2 __attribute__((ext_vector_type(2))); -typedef int16_t int16_t3 __attribute__((ext_vector_type(3))); -typedef int16_t int16_t4 __attribute__((ext_vector_type(4))); -typedef uint16_t uint16_t2 __attribute__((ext_vector_type(2))); -typedef uint16_t uint16_t3 __attribute__((ext_vector_type(3))); -typedef uint16_t uint16_t4 __attribute__((ext_vector_type(4))); +typedef vector<int16_t, 2> int16_t2; +typedef vector<int16_t, 3> int16_t3; +typedef vector<int16_t, 4> int16_t4; +typedef vector<uint16_t, 2> uint16_t2; +typedef vector<uint16_t, 3> uint16_t3; +typedef vector<uint16_t, 4> uint16_t4; #endif -typedef int int2 __attribute__((ext_vector_type(2))); -typedef int int3 __attribute__((ext_vector_type(3))); -typedef int int4 __attribute__((ext_vector_type(4))); -typedef uint uint2 __attribute__((ext_vector_type(2))); -typedef uint uint3 __attribute__((ext_vector_type(3))); -typedef uint uint4 __attribute__((ext_vector_type(4))); -typedef int64_t int64_t2 __attribute__((ext_vector_type(2))); -typedef int64_t int64_t3 __attribute__((ext_vector_type(3))); -typedef int64_t int64_t4 __attribute__((ext_vector_type(4))); -typedef uint64_t uint64_t2 __attribute__((ext_vector_type(2))); -typedef uint64_t uint64_t3 __attribute__((ext_vector_type(3))); -typedef uint64_t uint64_t4 __attribute__((ext_vector_type(4))); +typedef vector<int, 2> int2; +typedef vector<int, 3> int3; +typedef vector<int, 4> int4; +typedef vector<uint, 2> uint2; +typedef vector<uint, 3> uint3; +typedef vector<uint, 4> uint4; +typedef vector<int64_t, 2> int64_t2; +typedef vector<int64_t, 3> int64_t3; +typedef vector<int64_t, 4> int64_t4; +typedef vector<uint64_t, 2> uint64_t2; +typedef vector<uint64_t, 3> uint64_t3; +typedef vector<uint64_t, 4> uint64_t4; #ifdef __HLSL_ENABLE_16_BIT -typedef half half2 __attribute__((ext_vector_type(2))); -typedef half half3 __attribute__((ext_vector_type(3))); -typedef half half4 __attribute__((ext_vector_type(4))); +typedef vector<half, 2> half2; +typedef vector<half, 3> half3; +typedef vector<half, 4> half4; #endif -typedef float float2 __attribute__((ext_vector_type(2))); -typedef float float3 __attribute__((ext_vector_type(3))); -typedef float float4 __attribute__((ext_vector_type(4))); -typedef double double2 __attribute__((ext_vector_type(2))); -typedef double double3 __attribute__((ext_vector_type(3))); -typedef double double4 __attribute__((ext_vector_type(4))); +typedef vector<float, 2> float2; +typedef vector<float, 3> float3; +typedef vector<float, 4> float4; +typedef vector<double, 2> double2; +typedef vector<double, 3> double3; +typedef vector<double, 4> double4; #endif //_HLSL_HLSL_BASIC_TYPES_H_ diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index ed647d9e9c06..72a6bfeafd6a 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -10467,12 +10467,6 @@ float __ovld __cnfn fast_distance(float, float); float __ovld __cnfn fast_distance(float2, float2); float __ovld __cnfn fast_distance(float3, float3); float __ovld __cnfn fast_distance(float4, float4); -#ifdef cl_khr_fp16 -half __ovld __cnfn fast_distance(half, half); -half __ovld __cnfn fast_distance(half2, half2); -half __ovld __cnfn fast_distance(half3, half3); -half __ovld __cnfn fast_distance(half4, half4); -#endif //cl_khr_fp16 /** * Returns the length of vector p computed as: @@ -10482,12 +10476,6 @@ float __ovld __cnfn fast_length(float); float __ovld __cnfn fast_length(float2); float __ovld __cnfn fast_length(float3); float __ovld __cnfn fast_length(float4); -#ifdef cl_khr_fp16 -half __ovld __cnfn fast_length(half); -half __ovld __cnfn fast_length(half2); -half __ovld __cnfn fast_length(half3); -half __ovld __cnfn fast_length(half4); -#endif //cl_khr_fp16 /** * Returns a vector in the same direction as p but with a @@ -10514,12 +10502,6 @@ float __ovld __cnfn fast_normalize(float); float2 __ovld __cnfn fast_normalize(float2); float3 __ovld __cnfn fast_normalize(float3); float4 __ovld __cnfn fast_normalize(float4); -#ifdef cl_khr_fp16 -half __ovld __cnfn fast_normalize(half); -half2 __ovld __cnfn fast_normalize(half2); -half3 __ovld __cnfn fast_normalize(half3); -half4 __ovld __cnfn fast_normalize(half4); -#endif //cl_khr_fp16 // OpenCL v1.1 s6.11.6, v1.2 s6.12.6, v2.0 s6.13.6 - Relational Functions diff --git a/clang/lib/Headers/rdpruintrin.h b/clang/lib/Headers/rdpruintrin.h new file mode 100644 index 000000000000..89732bb8b3cf --- /dev/null +++ b/clang/lib/Headers/rdpruintrin.h @@ -0,0 +1,57 @@ +/*===---- rdpruintrin.h - RDPRU intrinsics ---------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H +#error "Never use <rdpruintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef __RDPRUINTRIN_H +#define __RDPRUINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("rdpru"))) + + +/// Reads the content of a processor register. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the <c> RDPRU </c> instruction. +/// +/// \param reg_id +/// A processor register identifier. +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__rdpru (int reg_id) +{ + return __builtin_ia32_rdpru(reg_id); +} + +#define __RDPRU_MPERF 0 +#define __RDPRU_APERF 1 + +/// Reads the content of processor register MPERF. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic generates instruction <c> RDPRU </c> to read the value of +/// register MPERF. +#define __mperf() __builtin_ia32_rdpru(__RDPRU_MPERF) + +/// Reads the content of processor register APERF. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic generates instruction <c> RDPRU </c> to read the value of +/// register APERF. +#define __aperf() __builtin_ia32_rdpru(__RDPRU_APERF) + +#undef __DEFAULT_FN_ATTRS + +#endif /* __RDPRUINTRIN_H */ diff --git a/clang/lib/Headers/stdatomic.h b/clang/lib/Headers/stdatomic.h index 780bcc2dfea1..3a0b9cc056be 100644 --- a/clang/lib/Headers/stdatomic.h +++ b/clang/lib/Headers/stdatomic.h @@ -158,10 +158,6 @@ typedef _Atomic(uintmax_t) atomic_uintmax_t; typedef struct atomic_flag { atomic_bool _Value; } atomic_flag; #define ATOMIC_FLAG_INIT { 0 } -#if __cplusplus >= 202002L && !defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS) -/* ATOMIC_FLAG_INIT was deprecated in C++20 but is not deprecated in C. */ -#pragma clang deprecated(ATOMIC_FLAG_INIT) -#endif /* These should be provided by the libc implementation. */ #ifdef __cplusplus diff --git a/clang/lib/Headers/x86intrin.h b/clang/lib/Headers/x86intrin.h index 768d0e56ab05..450fd008dab9 100644 --- a/clang/lib/Headers/x86intrin.h +++ b/clang/lib/Headers/x86intrin.h @@ -59,5 +59,9 @@ #include <clzerointrin.h> #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__RDPRU__) +#include <rdpruintrin.h> +#endif #endif /* __X86INTRIN_H */ diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index be7b7d6e17b2..567ca81f6ac2 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -550,7 +550,7 @@ Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End); assert(Id && "expected identifier token"); - return Id.getValue(); + return Id.value(); } bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First, diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 6820057642be..b3aac9df6546 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2392,13 +2392,37 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, // // This loop terminates with CurPtr pointing at the newline (or end of buffer) // character that ends the line comment. + + // C++23 [lex.phases] p1 + // Diagnose invalid UTF-8 if the corresponding warning is enabled, emitting a + // diagnostic only once per entire ill-formed subsequence to avoid + // emiting to many diagnostics (see http://unicode.org/review/pr-121.html). + bool UnicodeDecodingAlreadyDiagnosed = false; + char C; while (true) { C = *CurPtr; // Skip over characters in the fast loop. - while (C != 0 && // Potentially EOF. - C != '\n' && C != '\r') // Newline or DOS-style newline. + while (isASCII(C) && C != 0 && // Potentially EOF. + C != '\n' && C != '\r') { // Newline or DOS-style newline. C = *++CurPtr; + UnicodeDecodingAlreadyDiagnosed = false; + } + + if (!isASCII(C)) { + unsigned Length = llvm::getUTF8SequenceSize( + (const llvm::UTF8 *)CurPtr, (const llvm::UTF8 *)BufferEnd); + if (Length == 0) { + if (!UnicodeDecodingAlreadyDiagnosed && !isLexingRawMode()) + Diag(CurPtr, diag::warn_invalid_utf8_in_comment); + UnicodeDecodingAlreadyDiagnosed = true; + ++CurPtr; + } else { + UnicodeDecodingAlreadyDiagnosed = false; + CurPtr += Length; + } + continue; + } const char *NextLine = CurPtr; if (C != 0) { @@ -2665,6 +2689,12 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, if (C == '/') C = *CurPtr++; + // C++23 [lex.phases] p1 + // Diagnose invalid UTF-8 if the corresponding warning is enabled, emitting a + // diagnostic only once per entire ill-formed subsequence to avoid + // emiting to many diagnostics (see http://unicode.org/review/pr-121.html). + bool UnicodeDecodingAlreadyDiagnosed = false; + while (true) { // Skip over all non-interesting characters until we find end of buffer or a // (probably ending) '/' character. @@ -2673,14 +2703,21 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, // doesn't check for '\0'. !(PP && PP->getCodeCompletionFileLoc() == FileLoc)) { // While not aligned to a 16-byte boundary. - while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0) + while (C != '/' && (intptr_t)CurPtr % 16 != 0) { + if (!isASCII(C)) + goto MultiByteUTF8; C = *CurPtr++; - + } if (C == '/') goto FoundSlash; #ifdef __SSE2__ __m128i Slashes = _mm_set1_epi8('/'); - while (CurPtr+16 <= BufferEnd) { + while (CurPtr + 16 < BufferEnd) { + int Mask = _mm_movemask_epi8(*(const __m128i *)CurPtr); + if (LLVM_UNLIKELY(Mask != 0)) { + goto MultiByteUTF8; + } + // look for slashes int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr, Slashes)); if (cmp != 0) { @@ -2693,21 +2730,38 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, CurPtr += 16; } #elif __ALTIVEC__ + __vector unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80}; __vector unsigned char Slashes = { '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/' }; - while (CurPtr + 16 <= BufferEnd && - !vec_any_eq(*(const __vector unsigned char *)CurPtr, Slashes)) + while (CurPtr + 16 < BufferEnd) { + if (LLVM_UNLIKELY( + vec_any_ge(*(const __vector unsigned char *)CurPtr, LongUTF))) + goto MultiByteUTF8; + if (vec_any_eq(*(const __vector unsigned char *)CurPtr, Slashes)) { + break; + } CurPtr += 16; + } + #else - // Scan for '/' quickly. Many block comments are very large. - while (CurPtr[0] != '/' && - CurPtr[1] != '/' && - CurPtr[2] != '/' && - CurPtr[3] != '/' && - CurPtr+4 < BufferEnd) { - CurPtr += 4; + while (CurPtr + 16 < BufferEnd) { + bool HasNonASCII = false; + for (unsigned I = 0; I < 16; ++I) + HasNonASCII |= !isASCII(CurPtr[I]); + + if (LLVM_UNLIKELY(HasNonASCII)) + goto MultiByteUTF8; + + bool HasSlash = false; + for (unsigned I = 0; I < 16; ++I) + HasSlash |= CurPtr[I] == '/'; + if (HasSlash) + break; + CurPtr += 16; } #endif @@ -2715,9 +2769,30 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, C = *CurPtr++; } - // Loop to scan the remainder. - while (C != '/' && C != '\0') + // Loop to scan the remainder, warning on invalid UTF-8 + // if the corresponding warning is enabled, emitting a diagnostic only once + // per sequence that cannot be decoded. + while (C != '/' && C != '\0') { + if (isASCII(C)) { + UnicodeDecodingAlreadyDiagnosed = false; + C = *CurPtr++; + continue; + } + MultiByteUTF8: + // CurPtr is 1 code unit past C, so to decode + // the codepoint, we need to read from the previous position. + unsigned Length = llvm::getUTF8SequenceSize( + (const llvm::UTF8 *)CurPtr - 1, (const llvm::UTF8 *)BufferEnd); + if (Length == 0) { + if (!UnicodeDecodingAlreadyDiagnosed && !isLexingRawMode()) + Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment); + UnicodeDecodingAlreadyDiagnosed = true; + } else { + UnicodeDecodingAlreadyDiagnosed = false; + CurPtr += Length - 1; + } C = *CurPtr++; + } if (C == '/') { FoundSlash: @@ -3212,7 +3287,10 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, } if (Delimited && PP) { - Diag(BufferPtr, diag::ext_delimited_escape_sequence) << /*delimited*/ 0; + Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b + ? diag::warn_cxx2b_delimited_escape_sequence + : diag::ext_delimited_escape_sequence) + << /*delimited*/ 0 << (PP->getLangOpts().CPlusPlus ? 1 : 0); } if (Result) { @@ -3296,7 +3374,10 @@ llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr, } if (Diagnose && PP && !LooseMatch) - Diag(BufferPtr, diag::ext_delimited_escape_sequence) << /*named*/ 1; + Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b + ? diag::warn_cxx2b_delimited_escape_sequence + : diag::ext_delimited_escape_sequence) + << /*named*/ 1 << (PP->getLangOpts().CPlusPlus ? 1 : 0); if (LooseMatch) Res = LooseMatch->CodePoint; diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index ebf30c9f01a9..53635a7385ec 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -311,8 +311,9 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, << tok::r_brace; else if (!HadError) { Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, - diag::ext_delimited_escape_sequence) - << /*delimited*/ 0; + Features.CPlusPlus2b ? diag::warn_cxx2b_delimited_escape_sequence + : diag::ext_delimited_escape_sequence) + << /*delimited*/ 0 << (Features.CPlusPlus ? 1 : 0); } } @@ -641,8 +642,9 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, if ((IsDelimitedEscapeSequence || IsNamedEscapeSequence) && Diags) Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, - diag::ext_delimited_escape_sequence) - << (IsNamedEscapeSequence ? 1 : 0); + Features.CPlusPlus2b ? diag::warn_cxx2b_delimited_escape_sequence + : diag::ext_delimited_escape_sequence) + << (IsNamedEscapeSequence ? 1 : 0) << (Features.CPlusPlus ? 1 : 0); return true; } diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp index 310b95f36771..eae12beb6244 100644 --- a/clang/lib/Lex/MacroInfo.cpp +++ b/clang/lib/Lex/MacroInfo.cpp @@ -213,7 +213,7 @@ MacroDirective::DefInfo MacroDirective::getDefinition() { isPublic = VisMD->isPublic(); } - return DefInfo(nullptr, UndefLoc, !isPublic || isPublic.getValue()); + return DefInfo(nullptr, UndefLoc, !isPublic || isPublic.value()); } const MacroDirective::DefInfo diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index c791e3e4e5ca..57e344622f25 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -1219,8 +1219,8 @@ void ModuleMap::resolveHeaderDirectives( Module *Mod, llvm::Optional<const FileEntry *> File) const { bool NeedsFramework = false; SmallVector<Module::UnresolvedHeaderDirective, 1> NewHeaders; - const auto Size = File ? File.getValue()->getSize() : 0; - const auto ModTime = File ? File.getValue()->getModificationTime() : 0; + const auto Size = File ? File.value()->getSize() : 0; + const auto ModTime = File ? File.value()->getModificationTime() : 0; for (auto &Header : Mod->UnresolvedHeaders) { if (File && ((Header.ModTime && Header.ModTime != ModTime) || diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 4dcef01e3e4c..352e1f217819 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -1983,6 +1983,10 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_module_begin, Action.ModuleForHeader); break; + case ImportAction::HeaderUnitImport: + EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit, + Action.ModuleForHeader); + break; case ImportAction::ModuleImport: EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_module_include, Action.ModuleForHeader); @@ -2191,6 +2195,17 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( // known to have no effect beyond its effect on module visibility -- that is, // if it's got an include guard that is already defined, set to Import if it // is a modular header we've already built and should import. + + // For C++20 Modules + // [cpp.include]/7 If the header identified by the header-name denotes an + // importable header, it is implementation-defined whether the #include + // preprocessing directive is instead replaced by an import directive. + // For this implementation, the translation is permitted when we are parsing + // the Global Module Fragment, and not otherwise (the cases where it would be + // valid to replace an include with an import are highly constrained once in + // named module purview; this choice avoids considerable complexity in + // determining valid cases). + enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter; if (PPOpts->SingleFileParseMode) @@ -2203,13 +2218,34 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( alreadyIncluded(*File)) Action = IncludeLimitReached; + bool MaybeTranslateInclude = Action == Enter && File && SuggestedModule && + !isForModuleBuilding(SuggestedModule.getModule(), + getLangOpts().CurrentModule, + getLangOpts().ModuleName); + + // FIXME: We do not have a good way to disambiguate C++ clang modules from + // C++ standard modules (other than use/non-use of Header Units). + Module *SM = SuggestedModule.getModule(); + // Maybe a usable Header Unit + bool UsableHeaderUnit = false; + if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) { + if (TrackGMFState.inGMF() || IsImportDecl) + UsableHeaderUnit = true; + else if (!IsImportDecl) { + // This is a Header Unit that we do not include-translate + SuggestedModule = ModuleMap::KnownHeader(); + SM = nullptr; + } + } + // Maybe a usable clang header module. + bool UsableHeaderModule = + (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM && + !SM->isHeaderUnit(); + // Determine whether we should try to import the module for this #include, if // there is one. Don't do so if precompiled module support is disabled or we // are processing this module textually (because we're building the module). - if (Action == Enter && File && SuggestedModule && getLangOpts().Modules && - !isForModuleBuilding(SuggestedModule.getModule(), - getLangOpts().CurrentModule, - getLangOpts().ModuleName)) { + if (MaybeTranslateInclude && (UsableHeaderUnit || UsableHeaderModule)) { // If this include corresponds to a module but that module is // unavailable, diagnose the situation and bail out. // FIXME: Remove this; loadModule does the same check (but produces @@ -2226,7 +2262,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( // FIXME: Should we have a second loadModule() overload to avoid this // extra lookup step? SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path; - for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent) + for (Module *Mod = SM; Mod; Mod = Mod->Parent) Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name), FilenameTok.getLocation())); std::reverse(Path.begin(), Path.end()); @@ -2293,9 +2329,12 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( // Ask HeaderInfo if we should enter this #include file. If not, #including // this file will have no effect. if (Action == Enter && File && - !HeaderInfo.ShouldEnterIncludeFile( - *this, &File->getFileEntry(), EnterOnce, getLangOpts().Modules, - SuggestedModule.getModule(), IsFirstIncludeOfFile)) { + !HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(), + EnterOnce, getLangOpts().Modules, SM, + IsFirstIncludeOfFile)) { + // C++ standard modules: + // If we are not in the GMF, then we textually include only + // clang modules: // Even if we've already preprocessed this header once and know that we // don't need to see its contents again, we still need to import it if it's // modular because we might not have imported it from this submodule before. @@ -2303,7 +2342,10 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( // FIXME: We don't do this when compiling a PCH because the AST // serialization layer can't cope with it. This means we get local // submodule visibility semantics wrong in that case. - Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip; + if (UsableHeaderUnit && !getLangOpts().CompilingPCH) + Action = TrackGMFState.inGMF() ? Import : Skip; + else + Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip; } // Check for circular inclusion of the main file. @@ -2440,8 +2482,8 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( switch (Action) { case Skip: // If we don't need to enter the file, stop now. - if (Module *M = SuggestedModule.getModule()) - return {ImportAction::SkippedModuleImport, M}; + if (SM) + return {ImportAction::SkippedModuleImport, SM}; return {ImportAction::None}; case IncludeLimitReached: @@ -2451,16 +2493,15 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( case Import: { // If this is a module import, make it visible if needed. - Module *M = SuggestedModule.getModule(); - assert(M && "no module to import"); + assert(SM && "no module to import"); - makeModuleVisible(M, EndLoc); + makeModuleVisible(SM, EndLoc); if (IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp___include_macros) return {ImportAction::None}; - return {ImportAction::ModuleImport, M}; + return {ImportAction::ModuleImport, SM}; } case Enter: @@ -2492,13 +2533,14 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( return {ImportAction::None}; // Determine if we're switching to building a new submodule, and which one. - if (auto *M = SuggestedModule.getModule()) { - if (M->getTopLevelModule()->ShadowingModule) { + // This does not apply for C++20 modules header units. + if (SM && !SM->isHeaderUnit()) { + if (SM->getTopLevelModule()->ShadowingModule) { // We are building a submodule that belongs to a shadowed module. This // means we find header files in the shadowed module. - Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule) - << M->getFullModuleName(); - Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc, + Diag(SM->DefinitionLoc, diag::err_module_build_shadowed_submodule) + << SM->getFullModuleName(); + Diag(SM->getTopLevelModule()->ShadowingModule->DefinitionLoc, diag::note_previous_definition); return {ImportAction::None}; } @@ -2511,22 +2553,22 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( // that PCH, which means we should enter the submodule. We need to teach // the AST serialization layer to deal with the resulting AST. if (getLangOpts().CompilingPCH && - isForModuleBuilding(M, getLangOpts().CurrentModule, + isForModuleBuilding(SM, getLangOpts().CurrentModule, getLangOpts().ModuleName)) return {ImportAction::None}; assert(!CurLexerSubmodule && "should not have marked this as a module yet"); - CurLexerSubmodule = M; + CurLexerSubmodule = SM; // Let the macro handling code know that any future macros are within // the new submodule. - EnterSubmodule(M, EndLoc, /*ForPragma*/false); + EnterSubmodule(SM, EndLoc, /*ForPragma*/ false); // Let the parser know that any future declarations are within the new // submodule. // FIXME: There's no point doing this if we're handling a #__include_macros // directive. - return {ImportAction::ModuleBegin, M}; + return {ImportAction::ModuleBegin, SM}; } assert(!IsImportDecl && "failed to diagnose missing module for import decl"); diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index bf46e5422bc8..f3be2107f985 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1326,10 +1326,10 @@ already_lexed: // The last ')' has been reached; return the value if one found or // a diagnostic and a dummy value. if (Result) { - OS << Result.getValue(); + OS << Result.value(); // For strict conformance to __has_cpp_attribute rules, use 'L' // suffix for dated literals. - if (Result.getValue() > 1) + if (Result.value() > 1) OS << 'L'; } else { OS << 0; diff --git a/clang/lib/Lex/PreprocessingRecord.cpp b/clang/lib/Lex/PreprocessingRecord.cpp index 673ef637e396..2146a7c04217 100644 --- a/clang/lib/Lex/PreprocessingRecord.cpp +++ b/clang/lib/Lex/PreprocessingRecord.cpp @@ -115,7 +115,7 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) { Optional<bool> IsInFile = ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID); if (IsInFile) - return IsInFile.getValue(); + return IsInFile.value(); // The external source did not provide a definite answer, go and deserialize // the entity to check it. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 281f01fb28a4..5310db3c882b 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -941,6 +941,9 @@ void Preprocessor::Lex(Token &Result) { // Update ImportSeqState to track our position within a C++20 import-seq // if this token is being produced as a result of phase 4 of translation. + // Update TrackGMFState to decide if we are currently in a Global Module + // Fragment. GMF state updates should precede ImportSeq ones, since GMF state + // depends on the prevailing ImportSeq state in two cases. if (getLangOpts().CPlusPlusModules && LexLevel == 1 && !Result.getFlag(Token::IsReinjected)) { switch (Result.getKind()) { @@ -953,7 +956,11 @@ void Preprocessor::Lex(Token &Result) { case tok::r_brace: ImportSeqState.handleCloseBrace(); break; + // This token is injected to represent the translation of '#include "a.h"' + // into "import a.h;". Mimic the notional ';'. + case tok::annot_module_include: case tok::semi: + TrackGMFState.handleSemi(); ImportSeqState.handleSemi(); break; case tok::header_name: @@ -961,10 +968,12 @@ void Preprocessor::Lex(Token &Result) { ImportSeqState.handleHeaderName(); break; case tok::kw_export: + TrackGMFState.handleExport(); ImportSeqState.handleExport(); break; case tok::identifier: if (Result.getIdentifierInfo()->isModulesImport()) { + TrackGMFState.handleImport(ImportSeqState.afterTopLevelSeq()); ImportSeqState.handleImport(); if (ImportSeqState.afterImportSeq()) { ModuleImportLoc = Result.getLocation(); @@ -973,9 +982,13 @@ void Preprocessor::Lex(Token &Result) { CurLexerKind = CLK_LexAfterModuleImport; } break; + } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { + TrackGMFState.handleModule(ImportSeqState.afterTopLevelSeq()); + break; } LLVM_FALLTHROUGH; default: + TrackGMFState.handleMisc(); ImportSeqState.handleMisc(); break; } @@ -1222,6 +1235,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { LLVM_FALLTHROUGH; case ImportAction::ModuleImport: + case ImportAction::HeaderUnitImport: case ImportAction::SkippedModuleImport: // We chose to import (or textually enter) the file. Convert the // header-name token into a header unit annotation token. diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 08dccf9e43f7..5f53f9d684e7 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -1884,11 +1884,11 @@ void Parser::ParseOMPDeclareTargetClauses( if (DevTypeData) { if (DeviceTypeLoc.isValid()) { // We already saw another device_type clause, diagnose it. - Diag(DevTypeData.getValue().Loc, + Diag(DevTypeData.value().Loc, diag::warn_omp_more_one_device_type_clause); break; } - switch (static_cast<OpenMPDeviceType>(DevTypeData.getValue().Type)) { + switch (static_cast<OpenMPDeviceType>(DevTypeData.value().Type)) { case OMPC_DEVICE_TYPE_any: DTCI.DT = OMPDeclareTargetDeclAttr::DT_Any; break; @@ -3634,20 +3634,20 @@ OMPClause *Parser::ParseOpenMPSimpleClause(OpenMPClauseKind Kind, if (!Val || ParseOnly) return nullptr; if (getLangOpts().OpenMP < 51 && Kind == OMPC_default && - (static_cast<DefaultKind>(Val.getValue().Type) == OMP_DEFAULT_private || - static_cast<DefaultKind>(Val.getValue().Type) == + (static_cast<DefaultKind>(Val.value().Type) == OMP_DEFAULT_private || + static_cast<DefaultKind>(Val.value().Type) == OMP_DEFAULT_firstprivate)) { - Diag(Val.getValue().LOpen, diag::err_omp_invalid_dsa) - << getOpenMPClauseName(static_cast<DefaultKind>(Val.getValue().Type) == + Diag(Val.value().LOpen, diag::err_omp_invalid_dsa) + << getOpenMPClauseName(static_cast<DefaultKind>(Val.value().Type) == OMP_DEFAULT_private ? OMPC_private : OMPC_firstprivate) << getOpenMPClauseName(OMPC_default) << "5.1"; return nullptr; } - return Actions.ActOnOpenMPSimpleClause( - Kind, Val.getValue().Type, Val.getValue().TypeLoc, Val.getValue().LOpen, - Val.getValue().Loc, Val.getValue().RLoc); + return Actions.ActOnOpenMPSimpleClause(Kind, Val.value().Type, + Val.value().TypeLoc, Val.value().LOpen, + Val.value().Loc, Val.value().RLoc); } /// Parsing of OpenMP clauses like 'ordered'. diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 6f63d01bc8ad..ab8748c2c63d 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -663,12 +663,22 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, return false; } - case tok::annot_module_include: - Actions.ActOnModuleInclude(Tok.getLocation(), - reinterpret_cast<Module *>( - Tok.getAnnotationValue())); + case tok::annot_module_include: { + auto Loc = Tok.getLocation(); + Module *Mod = reinterpret_cast<Module *>(Tok.getAnnotationValue()); + // FIXME: We need a better way to disambiguate C++ clang modules and + // standard C++ modules. + if (!getLangOpts().CPlusPlusModules || !Mod->isHeaderUnit()) + Actions.ActOnModuleInclude(Loc, Mod); + else { + DeclResult Import = + Actions.ActOnModuleImport(Loc, SourceLocation(), Loc, Mod); + Decl *ImportDecl = Import.isInvalid() ? nullptr : Import.get(); + Result = Actions.ConvertDeclToDeclGroup(ImportDecl); + } ConsumeAnnotationToken(); return false; + } case tok::annot_module_begin: Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast<Module *>( diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp new file mode 100644 index 000000000000..56c2dd40bd9a --- /dev/null +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -0,0 +1,96 @@ +//===--- HLSLExternalSemaSource.cpp - HLSL Sema Source --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "clang/Sema/HLSLExternalSemaSource.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclCXX.h" +#include "clang/Basic/AttrKinds.h" +#include "clang/Sema/Sema.h" + +using namespace clang; + +HLSLExternalSemaSource::~HLSLExternalSemaSource() {} + +void HLSLExternalSemaSource::InitializeSema(Sema &S) { + SemaPtr = &S; + ASTContext &AST = SemaPtr->getASTContext(); + IdentifierInfo &HLSL = AST.Idents.get("hlsl", tok::TokenKind::identifier); + HLSLNamespace = + NamespaceDecl::Create(AST, AST.getTranslationUnitDecl(), false, + SourceLocation(), SourceLocation(), &HLSL, nullptr); + HLSLNamespace->setImplicit(true); + AST.getTranslationUnitDecl()->addDecl(HLSLNamespace); + defineHLSLVectorAlias(); + + // This adds a `using namespace hlsl` directive. In DXC, we don't put HLSL's + // built in types inside a namespace, but we are planning to change that in + // the near future. In order to be source compatible older versions of HLSL + // will need to implicitly use the hlsl namespace. For now in clang everything + // will get added to the namespace, and we can remove the using directive for + // future language versions to match HLSL's evolution. + auto *UsingDecl = UsingDirectiveDecl::Create( + AST, AST.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), + NestedNameSpecifierLoc(), SourceLocation(), HLSLNamespace, + AST.getTranslationUnitDecl()); + + AST.getTranslationUnitDecl()->addDecl(UsingDecl); +} + +void HLSLExternalSemaSource::defineHLSLVectorAlias() { + ASTContext &AST = SemaPtr->getASTContext(); + + llvm::SmallVector<NamedDecl *> TemplateParams; + + auto *TypeParam = TemplateTypeParmDecl::Create( + AST, HLSLNamespace, SourceLocation(), SourceLocation(), 0, 0, + &AST.Idents.get("element", tok::TokenKind::identifier), false, false); + TypeParam->setDefaultArgument(AST.getTrivialTypeSourceInfo(AST.FloatTy)); + + TemplateParams.emplace_back(TypeParam); + + auto *SizeParam = NonTypeTemplateParmDecl::Create( + AST, HLSLNamespace, SourceLocation(), SourceLocation(), 0, 1, + &AST.Idents.get("element_count", tok::TokenKind::identifier), AST.IntTy, + false, AST.getTrivialTypeSourceInfo(AST.IntTy)); + Expr *LiteralExpr = + IntegerLiteral::Create(AST, llvm::APInt(AST.getIntWidth(AST.IntTy), 4), + AST.IntTy, SourceLocation()); + SizeParam->setDefaultArgument(LiteralExpr); + TemplateParams.emplace_back(SizeParam); + + auto *ParamList = + TemplateParameterList::Create(AST, SourceLocation(), SourceLocation(), + TemplateParams, SourceLocation(), nullptr); + + IdentifierInfo &II = AST.Idents.get("vector", tok::TokenKind::identifier); + + QualType AliasType = AST.getDependentSizedExtVectorType( + AST.getTemplateTypeParmType(0, 0, false, TypeParam), + DeclRefExpr::Create( + AST, NestedNameSpecifierLoc(), SourceLocation(), SizeParam, false, + DeclarationNameInfo(SizeParam->getDeclName(), SourceLocation()), + AST.IntTy, VK_LValue), + SourceLocation()); + + auto *Record = TypeAliasDecl::Create(AST, HLSLNamespace, SourceLocation(), + SourceLocation(), &II, + AST.getTrivialTypeSourceInfo(AliasType)); + Record->setImplicit(true); + + auto *Template = + TypeAliasTemplateDecl::Create(AST, HLSLNamespace, SourceLocation(), + Record->getIdentifier(), ParamList, Record); + + Record->setDescribedAliasTemplate(Template); + Template->setImplicit(true); + Template->setLexicalDeclContext(Record->getDeclContext()); + HLSLNamespace->addDecl(Template); +} diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 8f8144d658d8..185ccebe2717 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -381,13 +381,13 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, InferredTarget = BaseMethodTarget; } else { bool ResolutionError = resolveCalleeCUDATargetConflict( - InferredTarget.getValue(), BaseMethodTarget, + InferredTarget.value(), BaseMethodTarget, InferredTarget.getPointer()); if (ResolutionError) { if (Diagnose) { Diag(ClassDecl->getLocation(), diag::note_implicit_member_target_infer_collision) - << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; + << (unsigned)CSM << InferredTarget.value() << BaseMethodTarget; } MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); return true; @@ -425,14 +425,13 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, InferredTarget = FieldMethodTarget; } else { bool ResolutionError = resolveCalleeCUDATargetConflict( - InferredTarget.getValue(), FieldMethodTarget, + InferredTarget.value(), FieldMethodTarget, InferredTarget.getPointer()); if (ResolutionError) { if (Diagnose) { Diag(ClassDecl->getLocation(), diag::note_implicit_member_target_infer_collision) - << (unsigned)CSM << InferredTarget.getValue() - << FieldMethodTarget; + << (unsigned)CSM << InferredTarget.value() << FieldMethodTarget; } MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); return true; @@ -445,9 +444,9 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, // it's the least restrictive option that can be invoked from any target. bool NeedsH = true, NeedsD = true; if (InferredTarget) { - if (InferredTarget.getValue() == CFT_Device) + if (InferredTarget.value() == CFT_Device) NeedsH = false; - else if (InferredTarget.getValue() == CFT_Host) + else if (InferredTarget.value() == CFT_Host) NeedsD = false; } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 79420cc27699..aed1d9befe2b 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -109,6 +109,11 @@ SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, Context.getTargetInfo()); } +static constexpr unsigned short combineFAPK(Sema::FormatArgumentPassingKind A, + Sema::FormatArgumentPassingKind B) { + return (A << 8) | B; +} + /// Checks that a call expression's argument count is at least the desired /// number. This is useful when doing custom type-checking on a variadic /// function. Returns true on error. @@ -1875,7 +1880,7 @@ static ExprResult SemaBuiltinLaunder(Sema &S, CallExpr *TheCall) { }(); if (DiagSelect) { S.Diag(TheCall->getBeginLoc(), diag::err_builtin_launder_invalid_arg) - << DiagSelect.getValue() << TheCall->getSourceRange(); + << DiagSelect.value() << TheCall->getSourceRange(); return ExprError(); } @@ -2408,7 +2413,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, bool ReturnsPointer = BuiltinID == Builtin::BIaddressof || BuiltinID == Builtin::BI__addressof; if (!(Param->isReferenceType() && - (ReturnsPointer ? Result->isPointerType() + (ReturnsPointer ? Result->isAnyPointerType() : Result->isReferenceType()) && Context.hasSameUnqualifiedType(Param->getPointeeType(), Result->getPointeeType()))) { @@ -5403,10 +5408,16 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, /// Returns true when the format fits the function and the FormatStringInfo has /// been populated. bool Sema::getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember, - FormatStringInfo *FSI) { - FSI->HasVAListArg = Format->getFirstArg() == 0; + bool IsVariadic, FormatStringInfo *FSI) { + if (Format->getFirstArg() == 0) + FSI->ArgPassingKind = FAPK_VAList; + else if (IsVariadic) + FSI->ArgPassingKind = FAPK_Variadic; + else + FSI->ArgPassingKind = FAPK_Fixed; FSI->FormatIdx = Format->getFormatIdx() - 1; - FSI->FirstDataArg = FSI->HasVAListArg ? 0 : Format->getFirstArg() - 1; + FSI->FirstDataArg = + FSI->ArgPassingKind == FAPK_VAList ? 0 : Format->getFirstArg() - 1; // The way the format attribute works in GCC, the implicit this argument // of member functions is counted. However, it doesn't appear in our own @@ -5461,7 +5472,7 @@ static void CheckNonNullArgument(Sema &S, bool Sema::GetFormatNSStringIdx(const FormatAttr *Format, unsigned &Idx) { FormatStringInfo FSI; if ((GetFormatStringType(Format) == FST_NSString) && - getFormatStringInfo(Format, false, &FSI)) { + getFormatStringInfo(Format, false, true, &FSI)) { Idx = FSI.FormatIdx; return true; } @@ -5615,6 +5626,40 @@ static void CheckNonNullArguments(Sema &S, } } +// 16 byte ByVal alignment not due to a vector member is not honoured by XL +// on AIX. Emit a warning here that users are generating binary incompatible +// code to be safe. +// Here we try to get information about the alignment of the struct member +// from the struct passed to the caller function. We only warn when the struct +// is passed byval, hence the series of checks and early returns if we are a not +// passing a struct byval. +void Sema::checkAIXMemberAlignment(SourceLocation Loc, const Expr *Arg) { + const auto *ICE = dyn_cast<ImplicitCastExpr>(Arg->IgnoreParens()); + if (!ICE) + return; + + const auto *DR = dyn_cast<DeclRefExpr>(ICE->getSubExpr()); + if (!DR) + return; + + const auto *PD = dyn_cast<ParmVarDecl>(DR->getDecl()); + if (!PD || !PD->getType()->isRecordType()) + return; + + QualType ArgType = Arg->getType(); + for (const FieldDecl *FD : + ArgType->castAs<RecordType>()->getDecl()->fields()) { + if (const auto *AA = FD->getAttr<AlignedAttr>()) { + CharUnits Alignment = + Context.toCharUnitsFromBits(AA->getAlignment(Context)); + if (Alignment.getQuantity() == 16) { + Diag(FD->getLocation(), diag::warn_not_xl_compatible) << FD; + Diag(Loc, diag::note_misaligned_member_used_here) << PD; + } + } + } +} + /// Warn if a pointer or reference argument passed to a function points to an /// object that is less aligned than the parameter. This can happen when /// creating a typedef with a lower alignment than the original type and then @@ -5725,6 +5770,12 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto, if (Arg->containsErrors()) continue; + if (Context.getTargetInfo().getTriple().isOSAIX() && FDecl && Arg && + FDecl->hasLinkage() && + FDecl->getFormalLinkage() != InternalLinkage && + CallType == VariadicDoesNotApply) + checkAIXMemberAlignment((Arg->getExprLoc()), Arg); + QualType ParamTy = Proto->getParamType(ArgIdx); QualType ArgTy = Arg->getType(); CheckArgAlignment(Arg->getExprLoc(), FDecl, std::to_string(ArgIdx + 1), @@ -7695,7 +7746,7 @@ bool Sema::SemaBuiltinOSLogFormat(CallExpr *TheCall) { llvm::SmallBitVector CheckedVarArgs(NumArgs, false); ArrayRef<const Expr *> Args(TheCall->getArgs(), TheCall->getNumArgs()); bool Success = CheckFormatArguments( - Args, /*HasVAListArg*/ false, FormatIdx, FirstDataArg, FST_OSLog, + Args, FAPK_Variadic, FormatIdx, FirstDataArg, FST_OSLog, VariadicFunction, TheCall->getBeginLoc(), SourceRange(), CheckedVarArgs); if (!Success) @@ -8412,19 +8463,15 @@ class FormatStringLiteral { SourceLocation getEndLoc() const LLVM_READONLY { return FExpr->getEndLoc(); } }; -} // namespace +} // namespace -static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, - const Expr *OrigFormatExpr, - ArrayRef<const Expr *> Args, - bool HasVAListArg, unsigned format_idx, - unsigned firstDataArg, - Sema::FormatStringType Type, - bool inFunctionCall, - Sema::VariadicCallType CallType, - llvm::SmallBitVector &CheckedVarArgs, - UncoveredArgHandler &UncoveredArg, - bool IgnoreStringsWithoutSpecifiers); +static void CheckFormatString( + Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr, + ArrayRef<const Expr *> Args, Sema::FormatArgumentPassingKind APK, + unsigned format_idx, unsigned firstDataArg, Sema::FormatStringType Type, + bool inFunctionCall, Sema::VariadicCallType CallType, + llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg, + bool IgnoreStringsWithoutSpecifiers); // Determine if an expression is a string literal or constant string. // If this function returns false on the arguments to a function expecting a @@ -8432,16 +8479,15 @@ static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, // True string literals are then checked by CheckFormatString. static StringLiteralCheckType checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args, - bool HasVAListArg, unsigned format_idx, + Sema::FormatArgumentPassingKind APK, unsigned format_idx, unsigned firstDataArg, Sema::FormatStringType Type, Sema::VariadicCallType CallType, bool InFunctionCall, llvm::SmallBitVector &CheckedVarArgs, - UncoveredArgHandler &UncoveredArg, - llvm::APSInt Offset, + UncoveredArgHandler &UncoveredArg, llvm::APSInt Offset, bool IgnoreStringsWithoutSpecifiers = false) { if (S.isConstantEvaluated()) return SLCT_NotALiteral; - tryAgain: +tryAgain: assert(Offset.isSigned() && "invalid offset"); if (E->isTypeDependent() || E->isValueDependent()) @@ -8486,9 +8532,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args, if (!CheckLeft) Left = SLCT_UncheckedLiteral; else { - Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, - HasVAListArg, format_idx, firstDataArg, - Type, CallType, InFunctionCall, + Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, APK, format_idx, + firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); if (Left == SLCT_NotALiteral || !CheckRight) { @@ -8497,8 +8542,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args, } StringLiteralCheckType Right = checkFormatStringExpr( - S, C->getFalseExpr(), Args, HasVAListArg, format_idx, firstDataArg, - Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, Offset, + S, C->getFalseExpr(), Args, APK, format_idx, firstDataArg, Type, + CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); return (CheckLeft && Left < Right) ? Left : Right; @@ -8548,42 +8593,85 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args, if (InitList->isStringLiteralInit()) Init = InitList->getInit(0)->IgnoreParenImpCasts(); } - return checkFormatStringExpr(S, Init, Args, - HasVAListArg, format_idx, - firstDataArg, Type, CallType, - /*InFunctionCall*/ false, CheckedVarArgs, - UncoveredArg, Offset); + return checkFormatStringExpr( + S, Init, Args, APK, format_idx, firstDataArg, Type, CallType, + /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset); } } - // For vprintf* functions (i.e., HasVAListArg==true), we add a - // special check to see if the format string is a function parameter - // of the function calling the printf function. If the function - // has an attribute indicating it is a printf-like function, then we - // should suppress warnings concerning non-literals being used in a call - // to a vprintf function. For example: + // When the format argument is an argument of this function, and this + // function also has the format attribute, there are several interactions + // for which there shouldn't be a warning. For instance, when calling + // v*printf from a function that has the printf format attribute, we + // should not emit a warning about using `fmt`, even though it's not + // constant, because the arguments have already been checked for the + // caller of `logmessage`: // - // void - // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...){ - // va_list ap; - // va_start(ap, fmt); - // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". - // ... + // __attribute__((format(printf, 1, 2))) + // void logmessage(char const *fmt, ...) { + // va_list ap; + // va_start(ap, fmt); + // vprintf(fmt, ap); /* do not emit a warning about "fmt" */ + // ... // } - if (HasVAListArg) { - if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(VD)) { - if (const Decl *D = dyn_cast<Decl>(PV->getDeclContext())) { - int PVIndex = PV->getFunctionScopeIndex() + 1; - for (const auto *PVFormat : D->specific_attrs<FormatAttr>()) { - // adjust for implicit parameter - if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) - if (MD->isInstance()) - ++PVIndex; + // + // Another interaction that we need to support is calling a variadic + // format function from a format function that has fixed arguments. For + // instance: + // + // __attribute__((format(printf, 1, 2))) + // void logstring(char const *fmt, char const *str) { + // printf(fmt, str); /* do not emit a warning about "fmt" */ + // } + // + // Same (and perhaps more relatably) for the variadic template case: + // + // template<typename... Args> + // __attribute__((format(printf, 1, 2))) + // void log(const char *fmt, Args&&... args) { + // printf(fmt, forward<Args>(args)...); + // /* do not emit a warning about "fmt" */ + // } + // + // Due to implementation difficulty, we only check the format, not the + // format arguments, in all cases. + // + if (const auto *PV = dyn_cast<ParmVarDecl>(VD)) { + if (const auto *D = dyn_cast<Decl>(PV->getDeclContext())) { + for (const auto *PVFormat : D->specific_attrs<FormatAttr>()) { + bool IsCXXMember = false; + if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) + IsCXXMember = MD->isInstance(); + + bool IsVariadic = false; + if (const FunctionType *FnTy = D->getFunctionType()) + IsVariadic = cast<FunctionProtoType>(FnTy)->isVariadic(); + else if (const auto *BD = dyn_cast<BlockDecl>(D)) + IsVariadic = BD->isVariadic(); + else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) + IsVariadic = OMD->isVariadic(); + + Sema::FormatStringInfo CallerFSI; + if (Sema::getFormatStringInfo(PVFormat, IsCXXMember, IsVariadic, + &CallerFSI)) { // We also check if the formats are compatible. // We can't pass a 'scanf' string to a 'printf' function. - if (PVIndex == PVFormat->getFormatIdx() && - Type == S.GetFormatStringType(PVFormat)) - return SLCT_UncheckedLiteral; + if (PV->getFunctionScopeIndex() == CallerFSI.FormatIdx && + Type == S.GetFormatStringType(PVFormat)) { + // Lastly, check that argument passing kinds transition in a + // way that makes sense: + // from a caller with FAPK_VAList, allow FAPK_VAList + // from a caller with FAPK_Fixed, allow FAPK_Fixed + // from a caller with FAPK_Fixed, allow FAPK_Variadic + // from a caller with FAPK_Variadic, allow FAPK_VAList + switch (combineFAPK(CallerFSI.ArgPassingKind, APK)) { + case combineFAPK(Sema::FAPK_VAList, Sema::FAPK_VAList): + case combineFAPK(Sema::FAPK_Fixed, Sema::FAPK_Fixed): + case combineFAPK(Sema::FAPK_Fixed, Sema::FAPK_Variadic): + case combineFAPK(Sema::FAPK_Variadic, Sema::FAPK_VAList): + return SLCT_UncheckedLiteral; + } + } } } } @@ -8602,8 +8690,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args, for (const auto *FA : ND->specific_attrs<FormatArgAttr>()) { const Expr *Arg = CE->getArg(FA->getFormatIdx().getASTIndex()); StringLiteralCheckType Result = checkFormatStringExpr( - S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type, - CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, Offset, + S, Arg, Args, APK, format_idx, firstDataArg, Type, CallType, + InFunctionCall, CheckedVarArgs, UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); if (IsFirst) { CommonResult = Result; @@ -8618,12 +8706,10 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args, if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) { const Expr *Arg = CE->getArg(0); - return checkFormatStringExpr(S, Arg, Args, - HasVAListArg, format_idx, - firstDataArg, Type, CallType, - InFunctionCall, CheckedVarArgs, - UncoveredArg, Offset, - IgnoreStringsWithoutSpecifiers); + return checkFormatStringExpr( + S, Arg, Args, APK, format_idx, firstDataArg, Type, CallType, + InFunctionCall, CheckedVarArgs, UncoveredArg, Offset, + IgnoreStringsWithoutSpecifiers); } } } @@ -8651,8 +8737,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args, const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex()); return checkFormatStringExpr( - S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type, - CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, Offset, + S, Arg, Args, APK, format_idx, firstDataArg, Type, CallType, + InFunctionCall, CheckedVarArgs, UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); } } @@ -8675,9 +8761,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args, return SLCT_NotALiteral; } FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue()); - CheckFormatString(S, &FStr, E, Args, HasVAListArg, format_idx, - firstDataArg, Type, InFunctionCall, CallType, - CheckedVarArgs, UncoveredArg, + CheckFormatString(S, &FStr, E, Args, APK, format_idx, firstDataArg, Type, + InFunctionCall, CallType, CheckedVarArgs, UncoveredArg, IgnoreStringsWithoutSpecifiers); return SLCT_CheckedLiteral; } @@ -8756,24 +8841,25 @@ Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) { /// functions) for correct use of format strings. /// Returns true if a format string has been fully checked. bool Sema::CheckFormatArguments(const FormatAttr *Format, - ArrayRef<const Expr *> Args, - bool IsCXXMember, - VariadicCallType CallType, - SourceLocation Loc, SourceRange Range, + ArrayRef<const Expr *> Args, bool IsCXXMember, + VariadicCallType CallType, SourceLocation Loc, + SourceRange Range, llvm::SmallBitVector &CheckedVarArgs) { FormatStringInfo FSI; - if (getFormatStringInfo(Format, IsCXXMember, &FSI)) - return CheckFormatArguments(Args, FSI.HasVAListArg, FSI.FormatIdx, + if (getFormatStringInfo(Format, IsCXXMember, CallType != VariadicDoesNotApply, + &FSI)) + return CheckFormatArguments(Args, FSI.ArgPassingKind, FSI.FormatIdx, FSI.FirstDataArg, GetFormatStringType(Format), CallType, Loc, Range, CheckedVarArgs); return false; } bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args, - bool HasVAListArg, unsigned format_idx, - unsigned firstDataArg, FormatStringType Type, - VariadicCallType CallType, - SourceLocation Loc, SourceRange Range, + Sema::FormatArgumentPassingKind APK, + unsigned format_idx, unsigned firstDataArg, + FormatStringType Type, + VariadicCallType CallType, SourceLocation Loc, + SourceRange Range, llvm::SmallBitVector &CheckedVarArgs) { // CHECK: printf/scanf-like function is called with no format string. if (format_idx >= Args.size()) { @@ -8796,12 +8882,11 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args, // ObjC string uses the same format specifiers as C string, so we can use // the same format string checking logic for both ObjC and C strings. UncoveredArgHandler UncoveredArg; - StringLiteralCheckType CT = - checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg, - format_idx, firstDataArg, Type, CallType, - /*IsFunctionCall*/ true, CheckedVarArgs, - UncoveredArg, - /*no string offset*/ llvm::APSInt(64, false) = 0); + StringLiteralCheckType CT = checkFormatStringExpr( + *this, OrigFormatExpr, Args, APK, format_idx, firstDataArg, Type, + CallType, + /*IsFunctionCall*/ true, CheckedVarArgs, UncoveredArg, + /*no string offset*/ llvm::APSInt(64, false) = 0); // Generate a diagnostic where an uncovered argument is detected. if (UncoveredArg.hasUncoveredArg()) { @@ -8864,7 +8949,7 @@ protected: const unsigned FirstDataArg; const unsigned NumDataArgs; const char *Beg; // Start of format string. - const bool HasVAListArg; + const Sema::FormatArgumentPassingKind ArgPassingKind; ArrayRef<const Expr *> Args; unsigned FormatIdx; llvm::SmallBitVector CoveredArgs; @@ -8879,14 +8964,15 @@ public: CheckFormatHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, const Sema::FormatStringType type, unsigned firstDataArg, - unsigned numDataArgs, const char *beg, bool hasVAListArg, + unsigned numDataArgs, const char *beg, + Sema::FormatArgumentPassingKind APK, ArrayRef<const Expr *> Args, unsigned formatIdx, bool inFunctionCall, Sema::VariadicCallType callType, llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg) : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), FSType(type), FirstDataArg(firstDataArg), NumDataArgs(numDataArgs), Beg(beg), - HasVAListArg(hasVAListArg), Args(Args), FormatIdx(formatIdx), + ArgPassingKind(APK), Args(Args), FormatIdx(formatIdx), inFunctionCall(inFunctionCall), CallType(callType), CheckedVarArgs(CheckedVarArgs), UncoveredArg(UncoveredArg) { CoveredArgs.resize(numDataArgs); @@ -9122,8 +9208,8 @@ const Expr *CheckFormatHandler::getDataArg(unsigned i) const { void CheckFormatHandler::DoneProcessing() { // Does the number of data arguments exceed the number of // format conversions in the format string? - if (!HasVAListArg) { - // Find any arguments that weren't covered. + if (ArgPassingKind != Sema::FAPK_VAList) { + // Find any arguments that weren't covered. CoveredArgs.flip(); signed notCoveredArg = CoveredArgs.find_first(); if (notCoveredArg >= 0) { @@ -9318,13 +9404,13 @@ public: const Expr *origFormatExpr, const Sema::FormatStringType type, unsigned firstDataArg, unsigned numDataArgs, bool isObjC, const char *beg, - bool hasVAListArg, ArrayRef<const Expr *> Args, - unsigned formatIdx, bool inFunctionCall, - Sema::VariadicCallType CallType, + Sema::FormatArgumentPassingKind APK, + ArrayRef<const Expr *> Args, unsigned formatIdx, + bool inFunctionCall, Sema::VariadicCallType CallType, llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg) : CheckFormatHandler(s, fexpr, origFormatExpr, type, firstDataArg, - numDataArgs, beg, hasVAListArg, Args, formatIdx, + numDataArgs, beg, APK, Args, formatIdx, inFunctionCall, CallType, CheckedVarArgs, UncoveredArg) {} @@ -9399,17 +9485,16 @@ void CheckPrintfHandler::handleInvalidMaskType(StringRef MaskType) { } bool CheckPrintfHandler::HandleAmount( - const analyze_format_string::OptionalAmount &Amt, - unsigned k, const char *startSpecifier, - unsigned specifierLen) { + const analyze_format_string::OptionalAmount &Amt, unsigned k, + const char *startSpecifier, unsigned specifierLen) { if (Amt.hasDataArgument()) { - if (!HasVAListArg) { + if (ArgPassingKind != Sema::FAPK_VAList) { unsigned argIndex = Amt.getArgIndex(); if (argIndex >= NumDataArgs) { EmitFormatDiagnostic(S.PDiag(diag::warn_printf_asterisk_missing_arg) - << k, + << k, getLocationOfByte(Amt.getStart()), - /*IsStringLocation*/true, + /*IsStringLocation*/ true, getSpecifierRange(startSpecifier, specifierLen)); // Don't do any more checking. We will just emit // spurious errors. @@ -9805,7 +9890,7 @@ bool CheckPrintfHandler::HandlePrintfSpecifier( HandleNonStandardConversionSpecifier(CS, startSpecifier, specifierLen); // The remaining checks depend on the data arguments. - if (HasVAListArg) + if (ArgPassingKind == Sema::FAPK_VAList) return true; if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex)) @@ -9953,6 +10038,12 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, ExprTy = TET->getUnderlyingExpr()->getType(); } + // When using the format attribute in C++, you can receive a function or an + // array that will necessarily decay to a pointer when passed to the final + // format consumer. Apply decay before type comparison. + if (ExprTy->canDecayToPointerType()) + ExprTy = S.Context.getDecayedType(ExprTy); + // Diagnose attempts to print a boolean value as a character. Unlike other // -Wformat diagnostics, this is fine from a type perspective, but it still // doesn't make sense. @@ -10173,6 +10264,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, // Since the warning for passing non-POD types to variadic functions // was deferred until now, we emit a warning for non-POD // arguments here. + bool EmitTypeMismatch = false; switch (S.isValidVarArgType(ExprTy)) { case Sema::VAK_Valid: case Sema::VAK_ValidInCXX11: { @@ -10198,17 +10290,23 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, } case Sema::VAK_Undefined: case Sema::VAK_MSVCUndefined: - EmitFormatDiagnostic(S.PDiag(diag::warn_non_pod_vararg_with_format_string) - << S.getLangOpts().CPlusPlus11 << ExprTy - << CallType - << AT.getRepresentativeTypeName(S.Context) << CSR - << E->getSourceRange(), - E->getBeginLoc(), /*IsStringLocation*/ false, CSR); - checkForCStrMembers(AT, E); + if (CallType == Sema::VariadicDoesNotApply) { + EmitTypeMismatch = true; + } else { + EmitFormatDiagnostic( + S.PDiag(diag::warn_non_pod_vararg_with_format_string) + << S.getLangOpts().CPlusPlus11 << ExprTy << CallType + << AT.getRepresentativeTypeName(S.Context) << CSR + << E->getSourceRange(), + E->getBeginLoc(), /*IsStringLocation*/ false, CSR); + checkForCStrMembers(AT, E); + } break; case Sema::VAK_Invalid: - if (ExprTy->isObjCObjectType()) + if (CallType == Sema::VariadicDoesNotApply) + EmitTypeMismatch = true; + else if (ExprTy->isObjCObjectType()) EmitFormatDiagnostic( S.PDiag(diag::err_cannot_pass_objc_interface_to_vararg_format) << S.getLangOpts().CPlusPlus11 << ExprTy << CallType @@ -10224,6 +10322,19 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, break; } + if (EmitTypeMismatch) { + // The function is not variadic, so we do not generate warnings about + // being allowed to pass that object as a variadic argument. Instead, + // since there are inherently no printf specifiers for types which cannot + // be passed as variadic arguments, emit a plain old specifier mismatch + // argument. + EmitFormatDiagnostic( + S.PDiag(diag::warn_format_conversion_argument_type_mismatch) + << AT.getRepresentativeTypeName(S.Context) << ExprTy << false + << E->getSourceRange(), + E->getBeginLoc(), false, CSR); + } + assert(FirstDataArg + FS.getArgIndex() < CheckedVarArgs.size() && "format string specifier index out of range"); CheckedVarArgs[FirstDataArg + FS.getArgIndex()] = true; @@ -10241,13 +10352,13 @@ public: CheckScanfHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, Sema::FormatStringType type, unsigned firstDataArg, unsigned numDataArgs, - const char *beg, bool hasVAListArg, + const char *beg, Sema::FormatArgumentPassingKind APK, ArrayRef<const Expr *> Args, unsigned formatIdx, bool inFunctionCall, Sema::VariadicCallType CallType, llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg) : CheckFormatHandler(s, fexpr, origFormatExpr, type, firstDataArg, - numDataArgs, beg, hasVAListArg, Args, formatIdx, + numDataArgs, beg, APK, Args, formatIdx, inFunctionCall, CallType, CheckedVarArgs, UncoveredArg) {} @@ -10351,7 +10462,7 @@ bool CheckScanfHandler::HandleScanfSpecifier( HandleNonStandardConversionSpecifier(CS, startSpecifier, specifierLen); // The remaining checks depend on the data arguments. - if (HasVAListArg) + if (ArgPassingKind == Sema::FAPK_VAList) return true; if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex)) @@ -10408,17 +10519,13 @@ bool CheckScanfHandler::HandleScanfSpecifier( return true; } -static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, - const Expr *OrigFormatExpr, - ArrayRef<const Expr *> Args, - bool HasVAListArg, unsigned format_idx, - unsigned firstDataArg, - Sema::FormatStringType Type, - bool inFunctionCall, - Sema::VariadicCallType CallType, - llvm::SmallBitVector &CheckedVarArgs, - UncoveredArgHandler &UncoveredArg, - bool IgnoreStringsWithoutSpecifiers) { +static void CheckFormatString( + Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr, + ArrayRef<const Expr *> Args, Sema::FormatArgumentPassingKind APK, + unsigned format_idx, unsigned firstDataArg, Sema::FormatStringType Type, + bool inFunctionCall, Sema::VariadicCallType CallType, + llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg, + bool IgnoreStringsWithoutSpecifiers) { // CHECK: is the format string a wide literal? if (!FExpr->isAscii() && !FExpr->isUTF8()) { CheckFormatHandler::EmitFormatDiagnostic( @@ -10469,23 +10576,21 @@ static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, Type == Sema::FST_OSTrace) { CheckPrintfHandler H( S, FExpr, OrigFormatExpr, Type, firstDataArg, numDataArgs, - (Type == Sema::FST_NSString || Type == Sema::FST_OSTrace), Str, - HasVAListArg, Args, format_idx, inFunctionCall, CallType, - CheckedVarArgs, UncoveredArg); - - if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen, - S.getLangOpts(), - S.Context.getTargetInfo(), - Type == Sema::FST_FreeBSDKPrintf)) + (Type == Sema::FST_NSString || Type == Sema::FST_OSTrace), Str, APK, + Args, format_idx, inFunctionCall, CallType, CheckedVarArgs, + UncoveredArg); + + if (!analyze_format_string::ParsePrintfString( + H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo(), + Type == Sema::FST_FreeBSDKPrintf)) H.DoneProcessing(); } else if (Type == Sema::FST_Scanf) { CheckScanfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg, - numDataArgs, Str, HasVAListArg, Args, format_idx, - inFunctionCall, CallType, CheckedVarArgs, UncoveredArg); + numDataArgs, Str, APK, Args, format_idx, inFunctionCall, + CallType, CheckedVarArgs, UncoveredArg); - if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen, - S.getLangOpts(), - S.Context.getTargetInfo())) + if (!analyze_format_string::ParseScanfString( + H, Str, Str + StrLen, S.getLangOpts(), S.Context.getTargetInfo())) H.DoneProcessing(); } // TODO: handle other formats } @@ -16765,9 +16870,15 @@ void Sema::DiagnoseSelfMove(const Expr *LHSExpr, const Expr *RHSExpr, RHSDeclRef->getDecl()->getCanonicalDecl()) return; - Diag(OpLoc, diag::warn_self_move) << LHSExpr->getType() - << LHSExpr->getSourceRange() - << RHSExpr->getSourceRange(); + auto D = Diag(OpLoc, diag::warn_self_move) + << LHSExpr->getType() << LHSExpr->getSourceRange() + << RHSExpr->getSourceRange(); + if (const FieldDecl *F = + getSelfAssignmentClassMemberCandidate(RHSDeclRef->getDecl())) + D << 1 << F + << FixItHint::CreateInsertion(LHSDeclRef->getBeginLoc(), "this->"); + else + D << 0; return; } @@ -16802,16 +16913,16 @@ void Sema::DiagnoseSelfMove(const Expr *LHSExpr, const Expr *RHSExpr, RHSDeclRef->getDecl()->getCanonicalDecl()) return; - Diag(OpLoc, diag::warn_self_move) << LHSExpr->getType() - << LHSExpr->getSourceRange() - << RHSExpr->getSourceRange(); + Diag(OpLoc, diag::warn_self_move) + << LHSExpr->getType() << 0 << LHSExpr->getSourceRange() + << RHSExpr->getSourceRange(); return; } if (isa<CXXThisExpr>(LHSBase) && isa<CXXThisExpr>(RHSBase)) - Diag(OpLoc, diag::warn_self_move) << LHSExpr->getType() - << LHSExpr->getSourceRange() - << RHSExpr->getSourceRange(); + Diag(OpLoc, diag::warn_self_move) + << LHSExpr->getType() << 0 << LHSExpr->getSourceRange() + << RHSExpr->getSourceRange(); } //===--- Layout compatibility ----------------------------------------------// diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 8c9ed5389488..86bad736227d 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -5362,8 +5362,8 @@ private: // Overwrite existing if the new member has more info. // The preference of . vs :: vs -> is fairly arbitrary. if (/*Inserted*/ R.second || - std::make_tuple(M.ArgTypes.hasValue(), M.ResultType != nullptr, - M.Operator) > std::make_tuple(O.ArgTypes.hasValue(), + std::make_tuple(M.ArgTypes.has_value(), M.ResultType != nullptr, + M.Operator) > std::make_tuple(O.ArgTypes.has_value(), O.ResultType != nullptr, O.Operator)) O = std::move(M); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 1139088ecde2..5a546503cced 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1625,22 +1625,20 @@ bool Sema::CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old) { Module *NewM = New->getOwningModule(); Module *OldM = Old->getOwningModule(); - if (NewM && NewM->Kind == Module::PrivateModuleFragment) + if (NewM && NewM->isPrivateModule()) NewM = NewM->Parent; - if (OldM && OldM->Kind == Module::PrivateModuleFragment) + if (OldM && OldM->isPrivateModule()) OldM = OldM->Parent; - // If we have a decl in a module partition, it is part of the containing - // module (which is the only thing that can be importing it). - if (NewM && OldM && - (OldM->Kind == Module::ModulePartitionInterface || - OldM->Kind == Module::ModulePartitionImplementation)) { - return false; - } - if (NewM == OldM) return false; + // Partitions are part of the module, but a partition could import another + // module, so verify that the PMIs agree. + if (NewM && OldM && (NewM->isModulePartition() || OldM->isModulePartition())) + return NewM->getPrimaryModuleInterfaceName() == + OldM->getPrimaryModuleInterfaceName(); + bool NewIsModuleInterface = NewM && NewM->isModulePurview(); bool OldIsModuleInterface = OldM && OldM->isModulePurview(); if (NewIsModuleInterface || OldIsModuleInterface) { @@ -3209,6 +3207,45 @@ static void mergeParamDeclAttributes(ParmVarDecl *newDecl, if (!foundAny) newDecl->dropAttrs(); } +static bool EquivalentArrayTypes(QualType Old, QualType New, + const ASTContext &Ctx) { + + auto NoSizeInfo = [&Ctx](QualType Ty) { + if (Ty->isIncompleteArrayType() || Ty->isPointerType()) + return true; + if (const auto *VAT = Ctx.getAsVariableArrayType(Ty)) + return VAT->getSizeModifier() == ArrayType::ArraySizeModifier::Star; + return false; + }; + + // `type[]` is equivalent to `type *` and `type[*]`. + if (NoSizeInfo(Old) && NoSizeInfo(New)) + return true; + + // Don't try to compare VLA sizes, unless one of them has the star modifier. + if (Old->isVariableArrayType() && New->isVariableArrayType()) { + const auto *OldVAT = Ctx.getAsVariableArrayType(Old); + const auto *NewVAT = Ctx.getAsVariableArrayType(New); + if ((OldVAT->getSizeModifier() == ArrayType::ArraySizeModifier::Star) ^ + (NewVAT->getSizeModifier() == ArrayType::ArraySizeModifier::Star)) + return false; + return true; + } + + // Only compare size, ignore Size modifiers and CVR. + if (Old->isConstantArrayType() && New->isConstantArrayType()) { + return Ctx.getAsConstantArrayType(Old)->getSize() == + Ctx.getAsConstantArrayType(New)->getSize(); + } + + // Don't try to compare dependent sized array + if (Old->isDependentSizedArrayType() && New->isDependentSizedArrayType()) { + return true; + } + + return Old == New; +} + static void mergeParamDeclTypes(ParmVarDecl *NewParam, const ParmVarDecl *OldParam, Sema &S) { @@ -3234,6 +3271,19 @@ static void mergeParamDeclTypes(ParmVarDecl *NewParam, NewParam->setType(NewT); } } + const auto *OldParamDT = dyn_cast<DecayedType>(OldParam->getType()); + const auto *NewParamDT = dyn_cast<DecayedType>(NewParam->getType()); + if (OldParamDT && NewParamDT && + OldParamDT->getPointeeType() == NewParamDT->getPointeeType()) { + QualType OldParamOT = OldParamDT->getOriginalType(); + QualType NewParamOT = NewParamDT->getOriginalType(); + if (!EquivalentArrayTypes(OldParamOT, NewParamOT, S.getASTContext())) { + S.Diag(NewParam->getLocation(), diag::warn_inconsistent_array_form) + << NewParam << NewParamOT; + S.Diag(OldParam->getLocation(), diag::note_previous_declaration_as) + << OldParamOT; + } + } } namespace { @@ -15464,7 +15514,7 @@ void Sema::AddKnownFunctionAttributesForReplaceableGlobalAllocationFunction( // specified by the value of this argument. if (AlignmentParam && !FD->hasAttr<AllocAlignAttr>()) { FD->addAttr(AllocAlignAttr::CreateImplicit( - Context, ParamIdx(AlignmentParam.getValue(), FD), FD->getLocation())); + Context, ParamIdx(AlignmentParam.value(), FD), FD->getLocation())); } // FIXME: diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index f79523983ed8..838fd48357fb 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2673,7 +2673,7 @@ static void handleAvailabilityAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (IOSToWatchOSMapping) { if (auto MappedVersion = IOSToWatchOSMapping->map( Version, MinimumWatchOSVersion, None)) { - return MappedVersion.getValue(); + return MappedVersion.value(); } } @@ -2682,10 +2682,10 @@ static void handleAvailabilityAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (NewMajor >= 2) { if (Version.getMinor()) { if (Version.getSubminor()) - return VersionTuple(NewMajor, Version.getMinor().getValue(), - Version.getSubminor().getValue()); + return VersionTuple(NewMajor, Version.getMinor().value(), + Version.getSubminor().value()); else - return VersionTuple(NewMajor, Version.getMinor().getValue()); + return VersionTuple(NewMajor, Version.getMinor().value()); } return VersionTuple(NewMajor); } @@ -3886,12 +3886,10 @@ static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // check if the function is variadic if the 3rd argument non-zero if (FirstArg != 0) { - if (isFunctionOrMethodVariadic(D)) { + if (isFunctionOrMethodVariadic(D)) ++NumArgs; // +1 for ... - } else { - S.Diag(D->getLocation(), diag::err_format_attribute_requires_variadic); - return; - } + else + S.Diag(D->getLocation(), diag::warn_gcc_requires_variadic_function) << AL; } // strftime requires FirstArg to be 0 because it doesn't read from any @@ -4314,13 +4312,6 @@ void Sema::AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E, return; uint64_t AlignVal = Alignment.getZExtValue(); - // 16 byte ByVal alignment not due to a vector member is not honoured by XL - // on AIX. Emit a warning here that users are generating binary incompatible - // code to be safe. - if (AlignVal >= 16 && isa<FieldDecl>(D) && - Context.getTargetInfo().getTriple().isOSAIX()) - Diag(AttrLoc, diag::warn_not_xl_compatible) << E->getSourceRange(); - // C++11 [dcl.align]p2: // -- if the constant expression evaluates to zero, the alignment // specifier shall have no effect @@ -8002,6 +7993,26 @@ static void handleZeroCallUsedRegsAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->addAttr(ZeroCallUsedRegsAttr::Create(S.Context, Kind, AL)); } +static void handleFunctionReturnThunksAttr(Sema &S, Decl *D, + const ParsedAttr &AL) { + StringRef KindStr; + SourceLocation LiteralLoc; + if (!S.checkStringLiteralArgumentAttr(AL, 0, KindStr, &LiteralLoc)) + return; + + FunctionReturnThunksAttr::Kind Kind; + if (!FunctionReturnThunksAttr::ConvertStrToKind(KindStr, Kind)) { + S.Diag(LiteralLoc, diag::warn_attribute_type_not_supported) + << AL << KindStr; + return; + } + // FIXME: it would be good to better handle attribute merging rather than + // silently replacing the existing attribute, so long as it does not break + // the expected codegen tests. + D->dropAttr<FunctionReturnThunksAttr>(); + D->addAttr(FunctionReturnThunksAttr::Create(S.Context, Kind, AL)); +} + static void handleSYCLKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // The 'sycl_kernel' attribute applies only to function templates. const auto *FD = cast<FunctionDecl>(D); @@ -8868,6 +8879,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_ZeroCallUsedRegs: handleZeroCallUsedRegsAttr(S, D, AL); break; + case ParsedAttr::AT_FunctionReturnThunks: + handleFunctionReturnThunksAttr(S, D, AL); + break; // Microsoft attributes: case ParsedAttr::AT_LayoutVersion: diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index b9ecde6f20a0..742c4828b8dc 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -14600,6 +14600,40 @@ static inline UnaryOperatorKind ConvertTokenKindToUnaryOpcode( return Opc; } +const FieldDecl * +Sema::getSelfAssignmentClassMemberCandidate(const ValueDecl *SelfAssigned) { + // Explore the case for adding 'this->' to the LHS of a self assignment, very + // common for setters. + // struct A { + // int X; + // -void setX(int X) { X = X; } + // +void setX(int X) { this->X = X; } + // }; + + // Only consider parameters for self assignment fixes. + if (!isa<ParmVarDecl>(SelfAssigned)) + return nullptr; + const auto *Method = + dyn_cast_or_null<CXXMethodDecl>(getCurFunctionDecl(true)); + if (!Method) + return nullptr; + + const CXXRecordDecl *Parent = Method->getParent(); + // In theory this is fixable if the lambda explicitly captures this, but + // that's added complexity that's rarely going to be used. + if (Parent->isLambda()) + return nullptr; + + // FIXME: Use an actual Lookup operation instead of just traversing fields + // in order to get base class fields. + auto Field = + llvm::find_if(Parent->fields(), + [Name(SelfAssigned->getDeclName())](const FieldDecl *F) { + return F->getDeclName() == Name; + }); + return (Field != Parent->field_end()) ? *Field : nullptr; +} + /// DiagnoseSelfAssignment - Emits a warning if a value is assigned to itself. /// This warning suppressed in the event of macro expansions. static void DiagnoseSelfAssignment(Sema &S, Expr *LHSExpr, Expr *RHSExpr, @@ -14630,10 +14664,16 @@ static void DiagnoseSelfAssignment(Sema &S, Expr *LHSExpr, Expr *RHSExpr, if (RefTy->getPointeeType().isVolatileQualified()) return; - S.Diag(OpLoc, IsBuiltin ? diag::warn_self_assignment_builtin - : diag::warn_self_assignment_overloaded) - << LHSDeclRef->getType() << LHSExpr->getSourceRange() - << RHSExpr->getSourceRange(); + auto Diag = S.Diag(OpLoc, IsBuiltin ? diag::warn_self_assignment_builtin + : diag::warn_self_assignment_overloaded) + << LHSDeclRef->getType() << LHSExpr->getSourceRange() + << RHSExpr->getSourceRange(); + if (const FieldDecl *SelfAssignField = + S.getSelfAssignmentClassMemberCandidate(RHSDecl)) + Diag << 1 << SelfAssignField + << FixItHint::CreateInsertion(LHSDeclRef->getBeginLoc(), "this->"); + else + Diag << 0; } /// Check if a bitwise-& is performed on an Objective-C pointer. This diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 0d73fcf8bf4e..11f33c7c6363 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -5394,6 +5394,39 @@ static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc, return false; } +namespace { +void DiagnoseBuiltinDeprecation(Sema& S, TypeTrait Kind, + SourceLocation KWLoc) { + TypeTrait Replacement; + switch (Kind) { + case UTT_HasNothrowAssign: + case UTT_HasNothrowMoveAssign: + Replacement = BTT_IsNothrowAssignable; + break; + case UTT_HasNothrowCopy: + case UTT_HasNothrowConstructor: + Replacement = TT_IsNothrowConstructible; + break; + case UTT_HasTrivialAssign: + case UTT_HasTrivialMoveAssign: + Replacement = BTT_IsTriviallyAssignable; + break; + case UTT_HasTrivialCopy: + case UTT_HasTrivialDefaultConstructor: + case UTT_HasTrivialMoveConstructor: + Replacement = TT_IsTriviallyConstructible; + break; + case UTT_HasTrivialDestructor: + Replacement = UTT_IsTriviallyDestructible; + break; + default: + return; + } + S.Diag(KWLoc, diag::warn_deprecated_builtin) + << getTraitSpelling(Kind) << getTraitSpelling(Replacement); +} +} + ExprResult Sema::BuildTypeTrait(TypeTrait Kind, SourceLocation KWLoc, ArrayRef<TypeSourceInfo *> Args, SourceLocation RParenLoc) { @@ -5403,6 +5436,8 @@ ExprResult Sema::BuildTypeTrait(TypeTrait Kind, SourceLocation KWLoc, *this, Kind, KWLoc, Args[0]->getType())) return ExprError(); + DiagnoseBuiltinDeprecation(*this, Kind, KWLoc); + bool Dependent = false; for (unsigned I = 0, N = Args.size(); I != N; ++I) { if (Args[I]->getType()->isDependentType()) { diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 3aa124d457b0..e9a1ac17ce86 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -935,3 +935,16 @@ void Sema::PopGlobalModuleFragment() { "left the wrong module scope, which is not global module fragment"); ModuleScopes.pop_back(); } + +bool Sema::isModuleUnitOfCurrentTU(const Module *M) const { + assert(M); + + Module *CurrentModuleUnit = getCurrentModule(); + + // If we are not in a module currently, M must not be the module unit of + // current TU. + if (!CurrentModuleUnit) + return false; + + return M->isSubModuleOf(CurrentModuleUnit->getTopLevelModule()); +} diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 6f501965552e..dc1470bf7a9d 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -840,21 +840,21 @@ public: /// false - otherwise. bool isOrderedRegion() const { if (const SharingMapTy *Top = getTopOfStackOrNull()) - return Top->OrderedRegion.hasValue(); + return Top->OrderedRegion.has_value(); return false; } /// Returns optional parameter for the ordered region. std::pair<const Expr *, OMPOrderedClause *> getOrderedRegionParam() const { if (const SharingMapTy *Top = getTopOfStackOrNull()) if (Top->OrderedRegion) - return Top->OrderedRegion.getValue(); + return Top->OrderedRegion.value(); return std::make_pair(nullptr, nullptr); } /// Returns true, if parent region is ordered (has associated /// 'ordered' clause), false - otherwise. bool isParentOrderedRegion() const { if (const SharingMapTy *Parent = getSecondOnStackOrNull()) - return Parent->OrderedRegion.hasValue(); + return Parent->OrderedRegion.has_value(); return false; } /// Returns optional parameter for the ordered region. @@ -862,7 +862,7 @@ public: getParentOrderedRegionParam() const { if (const SharingMapTy *Parent = getSecondOnStackOrNull()) if (Parent->OrderedRegion) - return Parent->OrderedRegion.getValue(); + return Parent->OrderedRegion.value(); return std::make_pair(nullptr, nullptr); } /// Marks current region as nowait (it has a 'nowait' clause). @@ -7831,9 +7831,9 @@ public: /// Return true if any expression is dependent. bool dependent() const; /// Returns true if the initializer forms non-rectangular loop. - bool doesInitDependOnLC() const { return InitDependOnLC.hasValue(); } + bool doesInitDependOnLC() const { return InitDependOnLC.has_value(); } /// Returns true if the condition forms non-rectangular loop. - bool doesCondDependOnLC() const { return CondDependOnLC.hasValue(); } + bool doesCondDependOnLC() const { return CondDependOnLC.has_value(); } /// Returns index of the loop we depend on (starting from 1), or 0 otherwise. unsigned getLoopDependentIdx() const { return InitDependOnLC.value_or(CondDependOnLC.value_or(0)); @@ -7942,18 +7942,18 @@ bool OpenMPIterationSpaceChecker::setStep(Expr *NewStep, bool Subtract) { if (!TestIsLessOp) TestIsLessOp = IsConstPos || (IsUnsigned && !Subtract); if (UB && - (IsConstZero || (TestIsLessOp.getValue() - ? (IsConstNeg || (IsUnsigned && Subtract)) - : (IsConstPos || (IsUnsigned && !Subtract))))) { + (IsConstZero || + (TestIsLessOp.value() ? (IsConstNeg || (IsUnsigned && Subtract)) + : (IsConstPos || (IsUnsigned && !Subtract))))) { SemaRef.Diag(NewStep->getExprLoc(), diag::err_omp_loop_incr_not_compatible) - << LCDecl << TestIsLessOp.getValue() << NewStep->getSourceRange(); + << LCDecl << TestIsLessOp.value() << NewStep->getSourceRange(); SemaRef.Diag(ConditionLoc, diag::note_omp_loop_cond_requres_compatible_incr) - << TestIsLessOp.getValue() << ConditionSrcRange; + << TestIsLessOp.value() << ConditionSrcRange; return true; } - if (TestIsLessOp.getValue() == Subtract) { + if (TestIsLessOp.value() == Subtract) { NewStep = SemaRef.CreateBuiltinUnaryOp(NewStep->getExprLoc(), UO_Minus, NewStep) .get(); @@ -8708,8 +8708,8 @@ Expr *OpenMPIterationSpaceChecker::buildNumIterations( UBVal = MinUB.get(); } } - Expr *UBExpr = TestIsLessOp.getValue() ? UBVal : LBVal; - Expr *LBExpr = TestIsLessOp.getValue() ? LBVal : UBVal; + Expr *UBExpr = TestIsLessOp.value() ? UBVal : LBVal; + Expr *LBExpr = TestIsLessOp.value() ? LBVal : UBVal; Expr *Upper = tryBuildCapture(SemaRef, UBExpr, Captures).get(); Expr *Lower = tryBuildCapture(SemaRef, LBExpr, Captures).get(); if (!Upper || !Lower) @@ -8772,12 +8772,12 @@ std::pair<Expr *, Expr *> OpenMPIterationSpaceChecker::buildMinMaxValues( // init value. Expr *MinExpr = nullptr; Expr *MaxExpr = nullptr; - Expr *LBExpr = TestIsLessOp.getValue() ? LB : UB; - Expr *UBExpr = TestIsLessOp.getValue() ? UB : LB; - bool LBNonRect = TestIsLessOp.getValue() ? InitDependOnLC.hasValue() - : CondDependOnLC.hasValue(); - bool UBNonRect = TestIsLessOp.getValue() ? CondDependOnLC.hasValue() - : InitDependOnLC.hasValue(); + Expr *LBExpr = TestIsLessOp.value() ? LB : UB; + Expr *UBExpr = TestIsLessOp.value() ? UB : LB; + bool LBNonRect = TestIsLessOp.value() ? InitDependOnLC.has_value() + : CondDependOnLC.has_value(); + bool UBNonRect = TestIsLessOp.value() ? CondDependOnLC.has_value() + : InitDependOnLC.has_value(); Expr *Lower = LBNonRect ? LBExpr : tryBuildCapture(SemaRef, LBExpr, Captures).get(); Expr *Upper = @@ -8901,8 +8901,8 @@ Expr *OpenMPIterationSpaceChecker::buildPreCond( ExprResult CondExpr = SemaRef.BuildBinOp( S, DefaultLoc, - TestIsLessOp.getValue() ? (TestIsStrictOp ? BO_LT : BO_LE) - : (TestIsStrictOp ? BO_GT : BO_GE), + TestIsLessOp.value() ? (TestIsStrictOp ? BO_LT : BO_LE) + : (TestIsStrictOp ? BO_GT : BO_GE), NewLB.get(), NewUB.get()); if (CondExpr.isUsable()) { if (!SemaRef.Context.hasSameUnqualifiedType(CondExpr.get()->getType(), @@ -8978,12 +8978,10 @@ Expr *OpenMPIterationSpaceChecker::buildOrderedLoopData( !SemaRef.getLangOpts().CPlusPlus) return nullptr; // Upper - Lower - Expr *Upper = TestIsLessOp.getValue() - ? Cnt - : tryBuildCapture(SemaRef, LB, Captures).get(); - Expr *Lower = TestIsLessOp.getValue() - ? tryBuildCapture(SemaRef, LB, Captures).get() - : Cnt; + Expr *Upper = + TestIsLessOp.value() ? Cnt : tryBuildCapture(SemaRef, LB, Captures).get(); + Expr *Lower = + TestIsLessOp.value() ? tryBuildCapture(SemaRef, LB, Captures).get() : Cnt; if (!Upper || !Lower) return nullptr; @@ -11570,7 +11568,7 @@ protected: bool checkType(ErrorInfoTy &ErrorInfo) const; static bool CheckValue(const Expr *E, ErrorInfoTy &ErrorInfo, - bool ShouldBeLValue) { + bool ShouldBeLValue, bool ShouldBeInteger = false) { if (ShouldBeLValue && !E->isLValue()) { ErrorInfo.Error = ErrorTy::XNotLValue; ErrorInfo.ErrorLoc = ErrorInfo.NoteLoc = E->getExprLoc(); @@ -11586,8 +11584,7 @@ protected: ErrorInfo.ErrorRange = ErrorInfo.NoteRange = E->getSourceRange(); return false; } - - if (!QTy->isIntegerType()) { + if (ShouldBeInteger && !QTy->isIntegerType()) { ErrorInfo.Error = ErrorTy::NotInteger; ErrorInfo.ErrorLoc = ErrorInfo.NoteLoc = E->getExprLoc(); ErrorInfo.ErrorRange = ErrorInfo.NoteRange = E->getSourceRange(); @@ -11890,7 +11887,7 @@ bool OpenMPAtomicCompareCaptureChecker::checkType(ErrorInfoTy &ErrorInfo) { if (V && !CheckValue(V, ErrorInfo, true)) return false; - if (R && !CheckValue(R, ErrorInfo, true)) + if (R && !CheckValue(R, ErrorInfo, true, true)) return false; return true; @@ -22588,27 +22585,27 @@ void Sema::ActOnOpenMPDeclareTargetName(NamedDecl *ND, SourceLocation Loc, auto *VD = cast<ValueDecl>(ND); llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr = OMPDeclareTargetDeclAttr::getActiveAttr(VD); - if (ActiveAttr && ActiveAttr.getValue()->getDevType() != DTCI.DT && - ActiveAttr.getValue()->getLevel() == Level) { + if (ActiveAttr && ActiveAttr.value()->getDevType() != DTCI.DT && + ActiveAttr.value()->getLevel() == Level) { Diag(Loc, diag::err_omp_device_type_mismatch) << OMPDeclareTargetDeclAttr::ConvertDevTypeTyToStr(DTCI.DT) << OMPDeclareTargetDeclAttr::ConvertDevTypeTyToStr( - ActiveAttr.getValue()->getDevType()); + ActiveAttr.value()->getDevType()); return; } - if (ActiveAttr && ActiveAttr.getValue()->getMapType() != MT && - ActiveAttr.getValue()->getLevel() == Level) { + if (ActiveAttr && ActiveAttr.value()->getMapType() != MT && + ActiveAttr.value()->getLevel() == Level) { Diag(Loc, diag::err_omp_declare_target_to_and_link) << ND; return; } - if (ActiveAttr && ActiveAttr.getValue()->getLevel() == Level) + if (ActiveAttr && ActiveAttr.value()->getLevel() == Level) return; Expr *IndirectE = nullptr; bool IsIndirect = false; if (DTCI.Indirect) { - IndirectE = DTCI.Indirect.getValue(); + IndirectE = DTCI.Indirect.value(); if (!IndirectE) IsIndirect = true; } @@ -22702,13 +22699,13 @@ void Sema::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr = OMPDeclareTargetDeclAttr::getActiveAttr(VD); unsigned Level = DeclareTargetNesting.size(); - if (ActiveAttr && ActiveAttr.getValue()->getLevel() >= Level) + if (ActiveAttr && ActiveAttr.value()->getLevel() >= Level) return; DeclareTargetContextInfo &DTCI = DeclareTargetNesting.back(); Expr *IndirectE = nullptr; bool IsIndirect = false; if (DTCI.Indirect) { - IndirectE = DTCI.Indirect.getValue(); + IndirectE = DTCI.Indirect.value(); if (!IndirectE) IsIndirect = true; } diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index dbfe6164bda2..67cf8f0371c5 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2695,8 +2695,15 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, for (TemplateParameterList::iterator NewParam = NewParams->begin(), NewParamEnd = NewParams->end(); NewParam != NewParamEnd; ++NewParam) { - // Variables used to diagnose redundant default arguments + // Whether we've seen a duplicate default argument in the same translation + // unit. bool RedundantDefaultArg = false; + // Whether we've found inconsis inconsitent default arguments in different + // translation unit. + bool InconsistentDefaultArg = false; + // The name of the module which contains the inconsistent default argument. + std::string PrevModuleName; + SourceLocation OldDefaultLoc; SourceLocation NewDefaultLoc; @@ -2729,7 +2736,16 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, OldDefaultLoc = OldTypeParm->getDefaultArgumentLoc(); NewDefaultLoc = NewTypeParm->getDefaultArgumentLoc(); SawDefaultArgument = true; - RedundantDefaultArg = true; + + if (!OldTypeParm->getOwningModule() || + isModuleUnitOfCurrentTU(OldTypeParm->getOwningModule())) + RedundantDefaultArg = true; + else if (!getASTContext().isSameDefaultTemplateArgument(OldTypeParm, + NewTypeParm)) { + InconsistentDefaultArg = true; + PrevModuleName = + OldTypeParm->getImportedOwningModule()->getFullModuleName(); + } PreviousDefaultArgLoc = NewDefaultLoc; } else if (OldTypeParm && OldTypeParm->hasDefaultArgument()) { // Merge the default argument from the old declaration to the @@ -2774,7 +2790,15 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, OldDefaultLoc = OldNonTypeParm->getDefaultArgumentLoc(); NewDefaultLoc = NewNonTypeParm->getDefaultArgumentLoc(); SawDefaultArgument = true; - RedundantDefaultArg = true; + if (!OldNonTypeParm->getOwningModule() || + isModuleUnitOfCurrentTU(OldNonTypeParm->getOwningModule())) + RedundantDefaultArg = true; + else if (!getASTContext().isSameDefaultTemplateArgument( + OldNonTypeParm, NewNonTypeParm)) { + InconsistentDefaultArg = true; + PrevModuleName = + OldNonTypeParm->getImportedOwningModule()->getFullModuleName(); + } PreviousDefaultArgLoc = NewDefaultLoc; } else if (OldNonTypeParm && OldNonTypeParm->hasDefaultArgument()) { // Merge the default argument from the old declaration to the @@ -2818,7 +2842,15 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, OldDefaultLoc = OldTemplateParm->getDefaultArgument().getLocation(); NewDefaultLoc = NewTemplateParm->getDefaultArgument().getLocation(); SawDefaultArgument = true; - RedundantDefaultArg = true; + if (!OldTemplateParm->getOwningModule() || + isModuleUnitOfCurrentTU(OldTemplateParm->getOwningModule())) + RedundantDefaultArg = true; + else if (!getASTContext().isSameDefaultTemplateArgument( + OldTemplateParm, NewTemplateParm)) { + InconsistentDefaultArg = true; + PrevModuleName = + OldTemplateParm->getImportedOwningModule()->getFullModuleName(); + } PreviousDefaultArgLoc = NewDefaultLoc; } else if (OldTemplateParm && OldTemplateParm->hasDefaultArgument()) { // Merge the default argument from the old declaration to the @@ -2845,13 +2877,32 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, Invalid = true; } + // [basic.def.odr]/13: + // There can be more than one definition of a + // ... + // default template argument + // ... + // in a program provided that each definition appears in a different + // translation unit and the definitions satisfy the [same-meaning + // criteria of the ODR]. + // + // Simply, the design of modules allows the definition of template default + // argument to be repeated across translation unit. Note that the ODR is + // checked elsewhere. But it is still not allowed to repeat template default + // argument in the same translation unit. if (RedundantDefaultArg) { - // C++ [temp.param]p12: - // A template-parameter shall not be given default arguments - // by two different declarations in the same scope. Diag(NewDefaultLoc, diag::err_template_param_default_arg_redefinition); Diag(OldDefaultLoc, diag::note_template_param_prev_default_arg); Invalid = true; + } else if (InconsistentDefaultArg) { + // We could only diagnose about the case that the OldParam is imported. + // The case NewParam is imported should be handled in ASTReader. + Diag(NewDefaultLoc, + diag::err_template_param_default_arg_inconsistent_redefinition); + Diag(OldDefaultLoc, + diag::note_template_param_prev_default_arg_in_other_module) + << PrevModuleName; + Invalid = true; } else if (MissingDefaultArg && TPC != TPC_FunctionTemplate) { // C++ [temp.param]p11: // If a template-parameter of a class template has a default diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index d7558017948a..bd166ff6f594 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -469,8 +469,8 @@ static void instantiateOMPDeclareVariantAttr( if (!DeclVarData) return; - E = DeclVarData.getValue().second; - FD = DeclVarData.getValue().first; + E = DeclVarData.value().second; + FD = DeclVarData.value().first; if (auto *VariantDRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { if (auto *VariantFD = dyn_cast<FunctionDecl>(VariantDRE->getDecl())) { @@ -4840,7 +4840,8 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, /*Complain*/DefinitionRequired)) { if (DefinitionRequired) Function->setInvalidDecl(); - else if (TSK == TSK_ExplicitInstantiationDefinition) { + else if (TSK == TSK_ExplicitInstantiationDefinition || + (Function->isConstexpr() && !Recursive)) { // Try again at the end of the translation unit (at which point a // definition will be required). assert(!Recursive); @@ -4855,7 +4856,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, Diag(PatternDecl->getLocation(), diag::note_forward_template_decl); if (getLangOpts().CPlusPlus11) Diag(PointOfInstantiation, diag::note_inst_declaration_hint) - << Function; + << Function; } } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index d853805bf97e..04ade0a3b9d0 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1462,18 +1462,19 @@ bool ASTReader::ReadSLocEntry(int ID) { unsigned RecCode = MaybeRecCode.get(); if (RecCode == SM_SLOC_BUFFER_BLOB_COMPRESSED) { - if (!llvm::zlib::isAvailable()) { + if (!llvm::compression::zlib::isAvailable()) { Error("zlib is not available"); return nullptr; } - SmallString<0> Uncompressed; - if (llvm::Error E = - llvm::zlib::uncompress(Blob, Uncompressed, Record[0])) { + SmallVector<uint8_t, 0> Uncompressed; + if (llvm::Error E = llvm::compression::zlib::uncompress( + llvm::arrayRefFromStringRef(Blob), Uncompressed, Record[0])) { Error("could not decompress embedded file contents: " + llvm::toString(std::move(E))); return nullptr; } - return llvm::MemoryBuffer::getMemBufferCopy(Uncompressed, Name); + return llvm::MemoryBuffer::getMemBufferCopy( + llvm::toStringRef(Uncompressed), Name); } else if (RecCode == SM_SLOC_BUFFER_BLOB) { return llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name, true); } else { @@ -5171,8 +5172,9 @@ namespace { bool ReadPreprocessorOptions(const PreprocessorOptions &PPOpts, bool Complain, std::string &SuggestedPredefines) override { - return checkPreprocessorOptions(ExistingPPOpts, PPOpts, nullptr, FileMgr, - SuggestedPredefines, ExistingLangOpts); + return checkPreprocessorOptions(PPOpts, ExistingPPOpts, /*Diags=*/nullptr, + FileMgr, SuggestedPredefines, + ExistingLangOpts); } }; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 1787909bb6f7..fac8fc141d2c 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1888,8 +1888,8 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { // without this file existing on disk. if (!U.Size || (!U.ModTime && IncludeTimestamps)) { PP->Diag(U.FileNameLoc, diag::err_module_no_size_mtime_for_header) - << WritingModule->getFullModuleName() << U.Size.hasValue() - << U.FileName; + << WritingModule->getFullModuleName() << U.Size.has_value() + << U.FileName; continue; } @@ -2000,12 +2000,13 @@ static void emitBlob(llvm::BitstreamWriter &Stream, StringRef Blob, // Compress the buffer if possible. We expect that almost all PCM // consumers will not want its contents. - SmallString<0> CompressedBuffer; - if (llvm::zlib::isAvailable()) { - llvm::zlib::compress(Blob.drop_back(1), CompressedBuffer); + SmallVector<uint8_t, 0> CompressedBuffer; + if (llvm::compression::zlib::isAvailable()) { + llvm::compression::zlib::compress( + llvm::arrayRefFromStringRef(Blob.drop_back(1)), CompressedBuffer); RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED, Blob.size() - 1}; Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record, - CompressedBuffer); + llvm::toStringRef(CompressedBuffer)); return; } diff --git a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp index 330ca90b7659..ca76e2d83381 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp @@ -769,7 +769,7 @@ void VariadicMethodTypeChecker::checkPreObjCMessage(const ObjCMethodCall &msg, if (!errorNode) errorNode = C.generateNonFatalErrorNode(); - if (!errorNode.getValue()) + if (!errorNode.value()) continue; SmallString<128> sbuf; @@ -787,7 +787,7 @@ void VariadicMethodTypeChecker::checkPreObjCMessage(const ObjCMethodCall &msg, os << "'"; auto R = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), - errorNode.getValue()); + errorNode.value()); R->addRange(msg.getArgSourceRange(I)); C.emitReport(std::move(R)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 2e4c8e643698..987cf65d6fec 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -848,7 +848,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, SValBuilder &svalBuilder = C.getSValBuilder(); QualType sizeTy = svalBuilder.getContext().getSizeType(); const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); - return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); + return svalBuilder.makeIntVal(strLit->getLength(), sizeTy); } case MemRegion::SymbolicRegionKind: case MemRegion::AllocaRegionKind: diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index 92d7cef78b13..36464707d06a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -1189,9 +1189,10 @@ MallocChecker::performKernelMalloc(const CallEvent &Call, CheckerContext &C, } NonLoc Flags = V.castAs<NonLoc>(); - NonLoc ZeroFlag = C.getSValBuilder() - .makeIntVal(KernelZeroFlagVal.getValue(), FlagsEx->getType()) - .castAs<NonLoc>(); + NonLoc ZeroFlag = + C.getSValBuilder() + .makeIntVal(KernelZeroFlagVal.value(), FlagsEx->getType()) + .castAs<NonLoc>(); SVal MaskedFlagsUC = C.getSValBuilder().evalBinOpNN(State, BO_And, Flags, ZeroFlag, FlagsEx->getType()); @@ -1239,7 +1240,7 @@ void MallocChecker::checkKernelMalloc(const CallEvent &Call, llvm::Optional<ProgramStateRef> MaybeState = performKernelMalloc(Call, C, State); if (MaybeState) - State = MaybeState.getValue(); + State = MaybeState.value(); else State = MallocMemAux(C, Call, Call.getArgExpr(0), UndefinedVal(), State, AF_Malloc); diff --git a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp index 3481936e572b..fb6afd0fdabc 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp @@ -136,10 +136,10 @@ void NonNullParamChecker::checkPreCall(const CallEvent &Call, if (!DV) continue; - assert(!HasRefTypeParam || isa<Loc>(DV.getValue())); + assert(!HasRefTypeParam || isa<Loc>(DV.value())); // Process the case when the argument is not a location. - if (ExpectedToBeNonNull && !isa<Loc>(DV.getValue())) { + if (ExpectedToBeNonNull && !isa<Loc>(DV.value())) { // If the argument is a union type, we want to handle a potential // transparent_union GCC extension. if (!ArgE) diff --git a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp index 9da44d5c0d39..aa3f4524798a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp @@ -234,7 +234,8 @@ void UnixAPIMisuseChecker::CheckOpenVariant(CheckerContext &C, } NonLoc oflags = V.castAs<NonLoc>(); NonLoc ocreateFlag = C.getSValBuilder() - .makeIntVal(Val_O_CREAT.getValue(), oflagsEx->getType()).castAs<NonLoc>(); + .makeIntVal(Val_O_CREAT.value(), oflagsEx->getType()) + .castAs<NonLoc>(); SVal maskedFlagsUC = C.getSValBuilder().evalBinOpNN(state, BO_And, oflags, ocreateFlag, oflagsEx->getType()); diff --git a/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp b/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp index 79d19a3b99f2..009cbd4559b5 100644 --- a/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp +++ b/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp @@ -78,7 +78,7 @@ AnalyzerOptions::getExplorationStrategy() const { ExplorationStrategyKind::BFSBlockDFSContents) .Default(None); assert(K && "User mode is invalid."); - return K.getValue(); + return K.value(); } CTUPhase1InliningKind AnalyzerOptions::getCTUPhase1Inlining() const { @@ -89,7 +89,7 @@ CTUPhase1InliningKind AnalyzerOptions::getCTUPhase1Inlining() const { .Case("all", CTUPhase1InliningKind::All) .Default(None); assert(K && "CTU inlining mode is invalid."); - return K.getValue(); + return K.value(); } IPAKind AnalyzerOptions::getIPAMode() const { @@ -102,7 +102,7 @@ IPAKind AnalyzerOptions::getIPAMode() const { .Default(None); assert(K && "IPA Mode is invalid."); - return K.getValue(); + return K.value(); } bool diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp index a2efe14f1045..4d6b82e63f6a 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -2363,15 +2363,15 @@ PathSensitiveBugReport::getInterestingnessKind(const MemRegion *R) const { } bool PathSensitiveBugReport::isInteresting(SVal V) const { - return getInterestingnessKind(V).hasValue(); + return getInterestingnessKind(V).has_value(); } bool PathSensitiveBugReport::isInteresting(SymbolRef sym) const { - return getInterestingnessKind(sym).hasValue(); + return getInterestingnessKind(sym).has_value(); } bool PathSensitiveBugReport::isInteresting(const MemRegion *R) const { - return getInterestingnessKind(R).hasValue(); + return getInterestingnessKind(R).has_value(); } bool PathSensitiveBugReport::isInteresting(const LocationContext *LC) const { diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index 5b72c91ccd74..2caa5bbc16df 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -2950,7 +2950,7 @@ PathDiagnosticPieceRef ConditionBRVisitor::VisitTrueTest( PathDiagnosticLocation Loc(Cond, SM, LCtx); auto event = std::make_shared<PathDiagnosticEventPiece>(Loc, Message); if (shouldPrune) - event->setPrunable(shouldPrune.getValue()); + event->setPrunable(shouldPrune.value()); return event; } @@ -3084,9 +3084,9 @@ bool ConditionBRVisitor::printValue(const Expr *CondVarExpr, raw_ostream &Out, Out << (TookTrue ? "not equal to 0" : "0"); } else { if (Ty->isBooleanType()) - Out << (IntValue.getValue()->getBoolValue() ? "true" : "false"); + Out << (IntValue.value()->getBoolValue() ? "true" : "false"); else - Out << *IntValue.getValue(); + Out << *IntValue.value(); } return true; @@ -3282,7 +3282,7 @@ void FalsePositiveRefutationBRVisitor::finalizeVisitor( if (!IsSAT) return; - if (!IsSAT.getValue()) + if (!IsSAT.value()) BR.markInvalid("Infeasible constraints", EndPathNode->getLocationContext()); } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp index 5f8a84591b2a..e1649f0b3df6 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp @@ -1016,7 +1016,7 @@ bool ExprEngine::shouldInlineCall(const CallEvent &Call, const Decl *D, // Check if this function has been marked as non-inlinable. Optional<bool> MayInline = Engine.FunctionSummaries->mayInline(D); if (MayInline) { - if (!MayInline.getValue()) + if (!MayInline.value()) return false; } else { diff --git a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp index 93c19a688b9a..d35646bfba91 100644 --- a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -407,11 +407,11 @@ void PlistPrinter::ReportMacroExpansions(raw_ostream &o, unsigned indent) { // Output the macro name. Indent(o, indent) << "<key>name</key>"; - EmitString(o, MacroName.getValue()) << '\n'; + EmitString(o, MacroName.value()) << '\n'; // Output what it expands into. Indent(o, indent) << "<key>expansion</key>"; - EmitString(o, ExpansionText.getValue()) << '\n'; + EmitString(o, ExpansionText.value()) << '\n'; // Finish up. --indent; diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index 13fac37899cd..cf3d13ffb7ba 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -113,6 +113,8 @@ nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *operand, QualType fromTy, QualType toTy) { assert(operand); assert(!Loc::isLocType(toTy)); + if (fromTy == toTy) + return operand; return nonloc::SymbolVal(SymMgr.getCastSymbol(operand, fromTy, toTy)); } @@ -1101,6 +1103,10 @@ nonloc::SymbolVal SValBuilder::simplifySymbolCast(nonloc::SymbolVal V, SymbolRef RootSym = cast<SymbolCast>(SE)->getOperand(); QualType RT = RootSym->getType().getCanonicalType(); + // FIXME support simplification from non-integers. + if (!RT->isIntegralOrEnumerationType()) + return makeNonLoc(SE, T, CastTy); + BasicValueFactory &BVF = getBasicValueFactory(); APSIntType CTy = BVF.getAPSIntType(CastTy); APSIntType TTy = BVF.getAPSIntType(T); diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp index d4d3f22c9327..19eb65b39b0a 100644 --- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp +++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp @@ -114,7 +114,7 @@ bool RVVType::verifyType() const { return false; if (isFloat() && ElementBitwidth == 8) return false; - unsigned V = Scale.getValue(); + unsigned V = Scale.value(); switch (ElementBitwidth) { case 1: case 8: @@ -798,7 +798,7 @@ RVVType::computeTypes(BasicType BT, int Log2LMUL, unsigned NF, if (!T) return llvm::None; // Record legal type index - Types.push_back(T.getValue()); + Types.push_back(T.value()); } return Types; } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp index 26c2b2b2f394..43127ea2df98 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp @@ -9,16 +9,19 @@ #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" #include "clang/Frontend/Utils.h" -namespace clang { -namespace tooling { -namespace dependencies { +using namespace clang; +using namespace tooling; +using namespace dependencies; std::vector<std::string> FullDependencies::getCommandLine( - std::function<StringRef(ModuleID)> LookupPCMPath) const { + llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)> + LookupModuleOutput) const { std::vector<std::string> Ret = getCommandLineWithoutModulePaths(); - for (ModuleID MID : ClangModuleDeps) - Ret.push_back(("-fmodule-file=" + LookupPCMPath(MID)).str()); + for (ModuleID MID : ClangModuleDeps) { + auto PCM = LookupModuleOutput(MID, ModuleOutputKind::ModuleFile); + Ret.push_back("-fmodule-file=" + PCM); + } return Ret; } @@ -192,7 +195,3 @@ DependencyScanningTool::getFullDependencies( return std::move(Result); return Consumer.getFullDependencies(CommandLine); } - -} // end namespace dependencies -} // end namespace tooling -} // end namespace clang diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index f7d96130b971..725bb2c318ac 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -8,6 +8,7 @@ #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" +#include "clang/Basic/MakeSupport.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" @@ -56,6 +57,9 @@ CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths( CI.getFrontendOpts().OutputFile.clear(); CI.getCodeGenOpts().MainFileName.clear(); CI.getCodeGenOpts().DwarfDebugFlags.clear(); + CI.getDiagnosticOpts().DiagnosticSerializationFile.clear(); + CI.getDependencyOutputOpts().OutputFile.clear(); + CI.getDependencyOutputOpts().Targets.clear(); CI.getFrontendOpts().ProgramAction = frontend::GenerateModule; CI.getLangOpts()->ModuleName = Deps.ID.ModuleName; @@ -107,18 +111,47 @@ serializeCompilerInvocation(const CompilerInvocation &CI) { return std::vector<std::string>{Args.begin(), Args.end()}; } +static std::vector<std::string> splitString(std::string S, char Separator) { + SmallVector<StringRef> Segments; + StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false); + std::vector<std::string> Result; + Result.reserve(Segments.size()); + for (StringRef Segment : Segments) + Result.push_back(Segment.str()); + return Result; +} + std::vector<std::string> ModuleDeps::getCanonicalCommandLine( - std::function<StringRef(ModuleID)> LookupPCMPath) const { + llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)> + LookupModuleOutput) const { CompilerInvocation CI(BuildInvocation); FrontendOptions &FrontendOpts = CI.getFrontendOpts(); InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(), InputKind::Format::ModuleMap); FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind); - FrontendOpts.OutputFile = std::string(LookupPCMPath(ID)); + FrontendOpts.OutputFile = + LookupModuleOutput(ID, ModuleOutputKind::ModuleFile); + if (HadSerializedDiagnostics) + CI.getDiagnosticOpts().DiagnosticSerializationFile = + LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile); + if (HadDependencyFile) { + DependencyOutputOptions &DepOpts = CI.getDependencyOutputOpts(); + DepOpts.OutputFile = + LookupModuleOutput(ID, ModuleOutputKind::DependencyFile); + DepOpts.Targets = splitString( + LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0'); + if (!DepOpts.OutputFile.empty() && DepOpts.Targets.empty()) { + // Fallback to -o as dependency target, as in the driver. + SmallString<128> Target; + quoteMakeTarget(FrontendOpts.OutputFile, Target); + DepOpts.Targets.push_back(std::string(Target)); + } + } for (ModuleID MID : ClangModuleDeps) - FrontendOpts.ModuleFiles.emplace_back(LookupPCMPath(MID)); + FrontendOpts.ModuleFiles.push_back( + LookupModuleOutput(MID, ModuleOutputKind::ModuleFile)); return serializeCompilerInvocation(CI); } @@ -309,6 +342,12 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(), *MDC.ScanInstance.getASTReader(), *MF); }); + MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts() + .DiagnosticSerializationFile.empty(); + MD.HadDependencyFile = + !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty(); + // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in + // the context hash since it can affect the command-line. MD.ID.ContextHash = MD.BuildInvocation.getModuleHash(); llvm::DenseSet<const Module *> AddedModules; diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index 07110a0db091..269bc59506b2 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -70,16 +70,18 @@ static cl::opt<std::string> cl::desc("The name of the predefined style used as a\n" "fallback in case clang-format is invoked with\n" "-style=file, but can not find the .clang-format\n" - "file to use.\n" + "file to use. Defaults to 'LLVM'.\n" "Use -fallback-style=none to skip formatting."), cl::init(clang::format::DefaultFallbackStyle), cl::cat(ClangFormatCategory)); static cl::opt<std::string> AssumeFileName( "assume-filename", - cl::desc("Override filename used to determine the language.\n" - "When reading from stdin, clang-format assumes this\n" - "filename to determine the language.\n" + cl::desc("Set filename used to determine the language and to find\n" + ".clang-format file.\n" + "Only used when reading from stdin.\n" + "If this is not passed, the .clang-format file is searched\n" + "relative to the current working directory when reading stdin.\n" "Unrecognized filenames are treated as C++.\n" "supported:\n" " CSharp: .cs\n" @@ -244,8 +246,12 @@ static bool fillRanges(MemoryBuffer *Code, errs() << "error: invalid <start line>:<end line> pair\n"; return true; } + if (FromLine < 1) { + errs() << "error: start line should be at least 1\n"; + return true; + } if (FromLine > ToLine) { - errs() << "error: start line should be less than end line\n"; + errs() << "error: start line should not exceed end line\n"; return true; } SourceLocation Start = Sources.translateLineCol(ID, FromLine, 1); diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp index 0e21106535ec..34335a599a00 100644 --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -408,7 +408,7 @@ int clang_main(int Argc, char **Argv) { llvm::Optional<std::string> OptCL = llvm::sys::Process::GetEnv("CL"); if (OptCL) { SmallVector<const char *, 8> PrependedOpts; - getCLEnvVarOptions(OptCL.getValue(), Saver, PrependedOpts); + getCLEnvVarOptions(OptCL.value(), Saver, PrependedOpts); // Insert right after the program name to prepend to the argument list. Args.insert(Args.begin() + 1, PrependedOpts.begin(), PrependedOpts.end()); @@ -417,7 +417,7 @@ int clang_main(int Argc, char **Argv) { llvm::Optional<std::string> Opt_CL_ = llvm::sys::Process::GetEnv("_CL_"); if (Opt_CL_) { SmallVector<const char *, 8> AppendedOpts; - getCLEnvVarOptions(Opt_CL_.getValue(), Saver, AppendedOpts); + getCLEnvVarOptions(Opt_CL_.value(), Saver, AppendedOpts); // Insert at the end of the argument list to append. Args.append(AppendedOpts.begin(), AppendedOpts.end()); diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp index 068e6a0c072c..db4cd77d8c53 100644 --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -105,6 +105,16 @@ void emitCodeGenSwitchBody(const RVVIntrinsic *RVVI, raw_ostream &OS) { return; } + // Cast pointer operand of vector load intrinsic. + for (const auto &I : enumerate(RVVI->getInputTypes())) { + if (I.value()->isPointer()) { + assert(RVVI->getIntrinsicTypes().front() == -1 && + "RVVI should be vector load intrinsic."); + OS << " Ops[" << I.index() << "] = Builder.CreateBitCast(Ops["; + OS << I.index() << "], ResultType->getPointerTo());\n"; + } + } + if (RVVI->isMasked()) { if (RVVI->hasVL()) { OS << " std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);\n"; @@ -218,7 +228,7 @@ void RVVEmitter::createHeader(raw_ostream &OS) { auto T = RVVType::computeType(BasicType::Int8, Log2LMUL, PrototypeDescriptor::Mask); if (T) - printType(T.getValue()); + printType(T.value()); } // Print RVV int/float types. for (char I : StringRef("csil")) { @@ -226,13 +236,13 @@ void RVVEmitter::createHeader(raw_ostream &OS) { for (int Log2LMUL : Log2LMULs) { auto T = RVVType::computeType(BT, Log2LMUL, PrototypeDescriptor::Vector); if (T) { - printType(T.getValue()); + printType(T.value()); auto UT = RVVType::computeType( BT, Log2LMUL, PrototypeDescriptor(BaseTypeModifier::Vector, VectorTypeModifier::NoModifier, TypeModifier::UnsignedInteger)); - printType(UT.getValue()); + printType(UT.value()); } } } @@ -241,7 +251,7 @@ void RVVEmitter::createHeader(raw_ostream &OS) { auto T = RVVType::computeType(BasicType::Float16, Log2LMUL, PrototypeDescriptor::Vector); if (T) - printType(T.getValue()); + printType(T.value()); } OS << "#endif\n"; @@ -250,7 +260,7 @@ void RVVEmitter::createHeader(raw_ostream &OS) { auto T = RVVType::computeType(BasicType::Float32, Log2LMUL, PrototypeDescriptor::Vector); if (T) - printType(T.getValue()); + printType(T.value()); } OS << "#endif\n"; @@ -259,7 +269,7 @@ void RVVEmitter::createHeader(raw_ostream &OS) { auto T = RVVType::computeType(BasicType::Float64, Log2LMUL, PrototypeDescriptor::Vector); if (T) - printType(T.getValue()); + printType(T.value()); } OS << "#endif\n\n"; diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp index 29eef6c68a51..b780128c9adb 100644 --- a/compiler-rt/lib/asan/asan_globals.cpp +++ b/compiler-rt/lib/asan/asan_globals.cpp @@ -90,7 +90,7 @@ static void ReportGlobal(const Global &g, const char *prefix) { DataInfo info; Symbolizer::GetOrInit()->SymbolizeData(g.beg, &info); if (info.line != 0) { - Report(" location: name=%s, %d\n", info.file, info.line); + Report(" location: name=%s, %d\n", info.file, static_cast<int>(info.line)); } } @@ -301,7 +301,7 @@ void PrintGlobalLocation(InternalScopedString *str, const __asan_global &g) { Symbolizer::GetOrInit()->SymbolizeData(g.beg, &info); if (info.line != 0) { - str->append("%s:%d", info.file, info.line); + str->append("%s:%d", info.file, static_cast<int>(info.line)); } else { str->append("%s", g.module_name); } diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index 37d0fc67cf75..13311b7e409b 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -248,10 +248,8 @@ static void ClearShadowMemoryForContextStack(uptr stack, uptr ssize) { uptr bottom = stack & ~(PageSize - 1); ssize += stack - bottom; ssize = RoundUpTo(ssize, PageSize); - static const uptr kMaxSaneContextStackSize = 1 << 22; // 4 Mb - if (AddrIsInMem(bottom) && ssize && ssize <= kMaxSaneContextStackSize) { + if (AddrIsInMem(bottom) && ssize) PoisonShadow(bottom, ssize, 0); - } } INTERCEPTOR(int, swapcontext, struct ucontext_t *oucp, diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp index 29cf526d9eb0..2bbf0ac5240a 100644 --- a/compiler-rt/lib/asan/asan_rtl.cpp +++ b/compiler-rt/lib/asan/asan_rtl.cpp @@ -421,9 +421,6 @@ static void AsanInitInternal() { __sanitizer::InitializePlatformEarly(); - // Re-exec ourselves if we need to set additional env or command line args. - MaybeReexec(); - // Setup internal allocator callback. SetLowLevelAllocateMinAlignment(ASAN_SHADOW_GRANULARITY); SetLowLevelAllocateCallback(OnLowLevelAllocate); diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp index f8725a173432..b771025cb93d 100644 --- a/compiler-rt/lib/hwasan/hwasan.cpp +++ b/compiler-rt/lib/hwasan/hwasan.cpp @@ -576,6 +576,12 @@ u8 __hwasan_generate_tag() { return t->GenerateRandomTag(); } +void __hwasan_add_frame_record(u64 frame_record_info) { + Thread *t = GetCurrentThread(); + if (t) + t->stack_allocations()->push(frame_record_info); +} + #if !SANITIZER_SUPPORTS_WEAK_HOOKS extern "C" { SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE diff --git a/compiler-rt/lib/hwasan/hwasan_fuchsia.cpp b/compiler-rt/lib/hwasan/hwasan_fuchsia.cpp index b066d4fdd114..967c796c339d 100644 --- a/compiler-rt/lib/hwasan/hwasan_fuchsia.cpp +++ b/compiler-rt/lib/hwasan/hwasan_fuchsia.cpp @@ -190,11 +190,11 @@ void InitializeOsSupport() { uint32_t features = 0; CHECK_EQ(zx_system_get_features(ZX_FEATURE_KIND_ADDRESS_TAGGING, &features), ZX_OK); - if (features != ZX_ARM64_FEATURE_ADDRESS_TAGGING_TBI && + if (!(features & ZX_ARM64_FEATURE_ADDRESS_TAGGING_TBI) && flags()->fail_without_syscall_abi) { Printf( - "FATAL: HWAddressSanitizer requires a kernel with tagged address " - "ABI.\n"); + "FATAL: HWAddressSanitizer requires " + "ZX_ARM64_FEATURE_ADDRESS_TAGGING_TBI.\n"); Die(); } #endif diff --git a/compiler-rt/lib/hwasan/hwasan_interface_internal.h b/compiler-rt/lib/hwasan/hwasan_interface_internal.h index ef771add411c..d1ecbb592a21 100644 --- a/compiler-rt/lib/hwasan/hwasan_interface_internal.h +++ b/compiler-rt/lib/hwasan/hwasan_interface_internal.h @@ -168,6 +168,14 @@ void __hwasan_thread_exit(); SANITIZER_INTERFACE_ATTRIBUTE void __hwasan_print_memory_usage(); +// The compiler will generate this when +// `-hwasan-record-stack-history-with-calls` is added as a flag, which will add +// frame record information to the stack ring buffer. This is an alternative to +// the compiler emitting instructions in the prologue for doing the same thing +// by accessing the ring buffer directly. +SANITIZER_INTERFACE_ATTRIBUTE +void __hwasan_add_frame_record(u64 frame_record_info); + SANITIZER_INTERFACE_ATTRIBUTE void *__hwasan_memcpy(void *dst, const void *src, uptr size); SANITIZER_INTERFACE_ATTRIBUTE diff --git a/compiler-rt/lib/hwasan/hwasan_linux.cpp b/compiler-rt/lib/hwasan/hwasan_linux.cpp index ba9e23621cc2..dcab473d8ad1 100644 --- a/compiler-rt/lib/hwasan/hwasan_linux.cpp +++ b/compiler-rt/lib/hwasan/hwasan_linux.cpp @@ -114,11 +114,21 @@ void InitializeOsSupport() { # define PR_SET_TAGGED_ADDR_CTRL 55 # define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +# define ARCH_GET_UNTAG_MASK 0x4001 +# define ARCH_ENABLE_TAGGED_ADDR 0x4002 // Check we're running on a kernel that can use the tagged address ABI. int local_errno = 0; - if (internal_iserror(internal_prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0), + bool has_abi; +# if defined(__x86_64__) + has_abi = (internal_iserror(internal_arch_prctl(ARCH_GET_UNTAG_MASK, 0), &local_errno) && - local_errno == EINVAL) { + local_errno == EINVAL); +# else + has_abi = (internal_iserror(internal_prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0), + &local_errno) && + local_errno == EINVAL); +# endif + if (has_abi) { # if SANITIZER_ANDROID || defined(HWASAN_ALIASING_MODE) // Some older Android kernels have the tagged pointer ABI on // unconditionally, and hence don't have the tagged-addr prctl while still @@ -142,17 +152,11 @@ void InitializeOsSupport() { !internal_prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0))) { # if defined(__x86_64__) && !defined(HWASAN_ALIASING_MODE) // Try the new prctl API for Intel LAM. The API is based on a currently - // unsubmitted patch to the Linux kernel (as of May 2021) and is thus + // unsubmitted patch to the Linux kernel (as of July 2022) and is thus // subject to change. Patch is here: - // https://lore.kernel.org/linux-mm/20210205151631.43511-12-kirill.shutemov@linux.intel.com/ - int tag_bits = kTagBits; - int tag_shift = kAddressTagShift; + // https://lore.kernel.org/linux-mm/20220712231328.5294-1-kirill.shutemov@linux.intel.com/ if (!internal_iserror( - internal_prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, - reinterpret_cast<unsigned long>(&tag_bits), - reinterpret_cast<unsigned long>(&tag_shift), 0))) { - CHECK_EQ(tag_bits, kTagBits); - CHECK_EQ(tag_shift, kAddressTagShift); + internal_arch_prctl(ARCH_ENABLE_TAGGED_ADDR, kTagBits))) { return; } # endif // defined(__x86_64__) && !defined(HWASAN_ALIASING_MODE) diff --git a/compiler-rt/lib/memprof/memprof_rtl.cpp b/compiler-rt/lib/memprof/memprof_rtl.cpp index 21424fb4f072..d568a075c3e1 100644 --- a/compiler-rt/lib/memprof/memprof_rtl.cpp +++ b/compiler-rt/lib/memprof/memprof_rtl.cpp @@ -170,9 +170,6 @@ static void MemprofInitInternal() { __sanitizer::InitializePlatformEarly(); - // Re-exec ourselves if we need to set additional env or command line args. - MaybeReexec(); - // Setup internal allocator callback. SetLowLevelAllocateMinAlignment(SHADOW_GRANULARITY); diff --git a/compiler-rt/lib/orc/elfnix_platform.cpp b/compiler-rt/lib/orc/elfnix_platform.cpp index 6f502b20f8ca..260731ed2732 100644 --- a/compiler-rt/lib/orc/elfnix_platform.cpp +++ b/compiler-rt/lib/orc/elfnix_platform.cpp @@ -63,11 +63,17 @@ Error runInitArray(const std::vector<ExecutorAddrRange> &InitArraySections, return Error::success(); } + struct TLSInfoEntry { unsigned long Key = 0; unsigned long DataAddress = 0; }; +struct TLSDescriptor { + void (*Resolver)(void *); + TLSInfoEntry *InfoEntry; +}; + class ELFNixPlatformRuntimeState { private: struct AtExitEntry { @@ -501,6 +507,13 @@ ORC_RT_INTERFACE void *__orc_rt_elfnix_tls_get_addr_impl(TLSInfoEntry *D) { reinterpret_cast<char *>(static_cast<uintptr_t>(D->DataAddress))); } +ORC_RT_INTERFACE ptrdiff_t ___orc_rt_elfnix_tlsdesc_resolver_impl( + TLSDescriptor *D, const char *ThreadPointer) { + const char *TLVPtr = reinterpret_cast<const char *>( + __orc_rt_elfnix_tls_get_addr_impl(D->InfoEntry)); + return TLVPtr - ThreadPointer; +} + ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult __orc_rt_elfnix_create_pthread_key(char *ArgData, size_t ArgSize) { return WrapperFunction<SPSExpected<uint64_t>(void)>::handle( diff --git a/compiler-rt/lib/orc/elfnix_tls.aarch64.S b/compiler-rt/lib/orc/elfnix_tls.aarch64.S new file mode 100644 index 000000000000..8dcdd535be8a --- /dev/null +++ b/compiler-rt/lib/orc/elfnix_tls.aarch64.S @@ -0,0 +1,94 @@ +//===-- elfnix_tlv.aarch64.s ---------------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of the ORC runtime support library. +// +//===----------------------------------------------------------------------===// + +// The content of this file is aarch64-only +#if defined(__arm64__) || defined(__aarch64__) + +#define REGISTER_SAVE_SPACE_SIZE 32 * 24 + + .text + + // returns address of TLV in x0, all other registers preserved + // TODO: add fast-path for repeat access + .globl ___orc_rt_elfnix_tlsdesc_resolver +___orc_rt_elfnix_tlsdesc_resolver: + sub sp, sp, #REGISTER_SAVE_SPACE_SIZE + stp x29, x30, [sp, #16 * 1] + stp x27, x28, [sp, #16 * 2] + stp x25, x26, [sp, #16 * 3] + stp x23, x24, [sp, #16 * 4] + stp x21, x22, [sp, #16 * 5] + stp x19, x20, [sp, #16 * 6] + stp x17, x18, [sp, #16 * 7] + stp x15, x16, [sp, #16 * 8] + stp x13, x14, [sp, #16 * 9] + stp x11, x12, [sp, #16 * 10] + stp x9, x10, [sp, #16 * 11] + stp x7, x8, [sp, #16 * 12] + stp x5, x6, [sp, #16 * 13] + stp x3, x4, [sp, #16 * 14] + stp x1, x2, [sp, #16 * 15] + stp q30, q31, [sp, #32 * 8] + stp q28, q29, [sp, #32 * 9] + stp q26, q27, [sp, #32 * 10] + stp q24, q25, [sp, #32 * 11] + stp q22, q23, [sp, #32 * 12] + stp q20, q21, [sp, #32 * 13] + stp q18, q19, [sp, #32 * 14] + stp q16, q17, [sp, #32 * 15] + stp q14, q15, [sp, #32 * 16] + stp q12, q13, [sp, #32 * 17] + stp q10, q11, [sp, #32 * 18] + stp q8, q9, [sp, #32 * 19] + stp q6, q7, [sp, #32 * 20] + stp q4, q5, [sp, #32 * 21] + stp q2, q3, [sp, #32 * 22] + stp q0, q1, [sp, #32 * 23] + + mrs x1, TPIDR_EL0 // get thread pointer + bl ___orc_rt_elfnix_tlsdesc_resolver_impl + + ldp q0, q1, [sp, #32 * 23] + ldp q2, q3, [sp, #32 * 22] + ldp q4, q5, [sp, #32 * 21] + ldp q6, q7, [sp, #32 * 20] + ldp q8, q9, [sp, #32 * 19] + ldp q10, q11, [sp, #32 * 18] + ldp q12, q13, [sp, #32 * 17] + ldp q14, q15, [sp, #32 * 16] + ldp q16, q17, [sp, #32 * 15] + ldp q18, q19, [sp, #32 * 14] + ldp q20, q21, [sp, #32 * 13] + ldp q22, q23, [sp, #32 * 12] + ldp q24, q25, [sp, #32 * 11] + ldp q26, q27, [sp, #32 * 10] + ldp q28, q29, [sp, #32 * 9] + ldp q30, q31, [sp, #32 * 8] + ldp x1, x2, [sp, #16 * 15] + ldp x3, x4, [sp, #16 * 14] + ldp x5, x6, [sp, #16 * 13] + ldp x7, x8, [sp, #16 * 12] + ldp x9, x10, [sp, #16 * 11] + ldp x11, x12, [sp, #16 * 10] + ldp x13, x14, [sp, #16 * 9] + ldp x15, x16, [sp, #16 * 8] + ldp x17, x18, [sp, #16 * 7] + ldp x19, x20, [sp, #16 * 6] + ldp x21, x22, [sp, #16 * 5] + ldp x23, x24, [sp, #16 * 4] + ldp x25, x26, [sp, #16 * 3] + ldp x27, x28, [sp, #16 * 2] + ldp x29, x30, [sp, #16 * 1] + add sp, sp, #REGISTER_SAVE_SPACE_SIZE + ret + +#endif // defined(__arm64__) || defined(__aarch64__) diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c index 4aa15e9e9590..4f46fd2839b9 100644 --- a/compiler-rt/lib/profile/GCDAProfiling.c +++ b/compiler-rt/lib/profile/GCDAProfiling.c @@ -3,9 +3,9 @@ |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |* See https://llvm.org/LICENSE.txt for license information. |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -|* +|* |*===----------------------------------------------------------------------===*| -|* +|* |* This file implements the call back routines for the gcov profiling |* instrumentation pass. Link against this library when running code through |* the -insert-gcov-profiling LLVM pass. @@ -65,7 +65,7 @@ static char *filename = NULL; /* * The current file we're outputting. - */ + */ static FILE *output_file = NULL; /* @@ -264,11 +264,6 @@ static int map_file(void) { static void unmap_file(void) { #if defined(_WIN32) - if (!FlushViewOfFile(write_buffer, file_size)) { - fprintf(stderr, "profiling: %s: cannot flush mapped view: %lu\n", filename, - GetLastError()); - } - if (!UnmapViewOfFile(write_buffer)) { fprintf(stderr, "profiling: %s: cannot unmap mapped view: %lu\n", filename, GetLastError()); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index 345c262af972..517f776baf6e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -1016,7 +1016,6 @@ struct SignalContext { }; void InitializePlatformEarly(); -void MaybeReexec(); template <typename Fn> class RunOnDestruction { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 60537018b889..9af296b1853a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -2531,30 +2531,30 @@ INTERCEPTOR(int, __b64_pton, char const *src, char *target, SIZE_T targsize) { #endif // SANITIZER_INTERCEPT___B64_TO #if SANITIZER_INTERCEPT___DN_EXPAND -#if __GLIBC_PREREQ(2, 34) +# if __GLIBC_PREREQ(2, 34) // Changed with https://sourceware.org/git/?p=glibc.git;h=640bbdf -#define DN_EXPAND_INTERCEPTOR_NAME dn_expand -#else -#define DN_EXPAND_INTERCEPTOR_NAME __dn_expand -#endif - INTERCEPTOR(int, DN_EXPAND_INTERCEPTOR_NAME, unsigned char const *base, - unsigned char const *end, unsigned char const *src, char *dest, - int space) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, DN_EXPAND_INTERCEPTOR_NAME, base, end, src, dest, space); +# define DN_EXPAND_INTERCEPTOR_NAME dn_expand +# else +# define DN_EXPAND_INTERCEPTOR_NAME __dn_expand +# endif +INTERCEPTOR(int, DN_EXPAND_INTERCEPTOR_NAME, unsigned char const *base, + unsigned char const *end, unsigned char const *src, char *dest, + int space) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, DN_EXPAND_INTERCEPTOR_NAME, base, end, src, + dest, space); // TODO: add read check if __dn_comp intercept added int res = REAL(DN_EXPAND_INTERCEPTOR_NAME)(base, end, src, dest, space); if (res >= 0) - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, res + 1); + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, internal_strlen(dest) + 1); return res; } -#define INIT___DN_EXPAND \ +# define INIT___DN_EXPAND \ COMMON_INTERCEPT_FUNCTION(DN_EXPAND_INTERCEPTOR_NAME); #else // SANITIZER_INTERCEPT___DN_EXPAND -#define INIT___DN_EXPAND +# define INIT___DN_EXPAND #endif // SANITIZER_INTERCEPT___DN_EXPAND - #if SANITIZER_INTERCEPT_POSIX_SPAWN template <class RealSpawnPtr> diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc index 8627ffd0d01c..6148ae56067c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc @@ -68,9 +68,12 @@ COMMON_FLAG( COMMON_FLAG( int, verbosity, 0, "Verbosity level (0 - silent, 1 - a bit of output, 2+ - more output).") -COMMON_FLAG(bool, strip_env, 1, +COMMON_FLAG(bool, strip_env, true, "Whether to remove the sanitizer from DYLD_INSERT_LIBRARIES to " - "avoid passing it to children. Default is true.") + "avoid passing it to children on Apple platforms. Default is true.") +COMMON_FLAG(bool, verify_interceptors, true, + "Verify that interceptors are working on Apple platforms. Default " + "is true.") COMMON_FLAG(bool, detect_leaks, !SANITIZER_APPLE, "Enable memory leak detection.") COMMON_FLAG( bool, leak_check_at_exit, true, diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp index e253b67fc484..a92e84cb8ecf 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp @@ -87,7 +87,6 @@ void GetThreadStackTopAndBottom(bool, uptr *stack_top, uptr *stack_bottom) { } void InitializePlatformEarly() {} -void MaybeReexec() {} void CheckASLR() {} void CheckMPROTECT() {} void PlatformPrepareForSandboxing(void *args) {} diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 47acf10650dd..be37fd7f68b3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -34,7 +34,7 @@ // format. Struct kernel_stat is defined as 'struct stat' in asm/stat.h. To // access stat from asm/stat.h, without conflicting with definition in // sys/stat.h, we use this trick. -#if defined(__mips64) +#if SANITIZER_MIPS64 #include <asm/unistd.h> #include <sys/types.h> #define stat kernel_stat @@ -124,8 +124,9 @@ const int FUTEX_WAKE_PRIVATE = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; // Are we using 32-bit or 64-bit Linux syscalls? // x32 (which defines __x86_64__) has SANITIZER_WORDSIZE == 32 // but it still needs to use 64-bit syscalls. -#if SANITIZER_LINUX && (defined(__x86_64__) || defined(__powerpc64__) || \ - SANITIZER_WORDSIZE == 64) +#if SANITIZER_LINUX && (defined(__x86_64__) || defined(__powerpc64__) || \ + SANITIZER_WORDSIZE == 64 || \ + (defined(__mips__) && _MIPS_SIM == _ABIN32)) # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 1 #else # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0 @@ -289,7 +290,7 @@ static void stat64_to_stat(struct stat64 *in, struct stat *out) { } #endif -#if defined(__mips64) +#if SANITIZER_MIPS64 // Undefine compatibility macros from <sys/stat.h> // so that they would not clash with the kernel_stat // st_[a|m|c]time fields @@ -343,7 +344,9 @@ uptr internal_stat(const char *path, void *buf) { #if SANITIZER_FREEBSD return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0); # elif SANITIZER_LINUX -# if (SANITIZER_WORDSIZE == 64 || SANITIZER_X32) && !SANITIZER_SPARC +# if (SANITIZER_WORDSIZE == 64 || SANITIZER_X32 || \ + (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \ + !SANITIZER_SPARC return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0); # else @@ -366,7 +369,9 @@ uptr internal_lstat(const char *path, void *buf) { return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, (uptr)buf, AT_SYMLINK_NOFOLLOW); # elif SANITIZER_LINUX -# if (defined(_LP64) || SANITIZER_X32) && !SANITIZER_SPARC +# if (defined(_LP64) || SANITIZER_X32 || \ + (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \ + !SANITIZER_SPARC return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, AT_SYMLINK_NOFOLLOW); # else @@ -758,6 +763,13 @@ uptr internal_lseek(fd_t fd, OFF_T offset, int whence) { uptr internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5) { return internal_syscall(SYSCALL(prctl), option, arg2, arg3, arg4, arg5); } +# if defined(__x86_64__) +#include <asm/unistd_64.h> +// Currently internal_arch_prctl() is only needed on x86_64. +uptr internal_arch_prctl(int option, uptr arg2) { + return internal_syscall(__NR_arch_prctl, option, arg2); +} +# endif #endif uptr internal_sigaltstack(const void *ss, void *oss) { @@ -1057,7 +1069,7 @@ uptr GetMaxVirtualAddress() { return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1; #elif SANITIZER_RISCV64 return (1ULL << 38) - 1; -# elif defined(__mips64) +# elif SANITIZER_MIPS64 return (1ULL << 40) - 1; // 0x000000ffffffffffUL; # elif defined(__s390x__) return (1ULL << 53) - 1; // 0x001fffffffffffffUL; @@ -2175,10 +2187,6 @@ void InitializePlatformEarly() { // Do nothing. } -void MaybeReexec() { - // No need to re-exec on Linux. -} - void CheckASLR() { #if SANITIZER_NETBSD int mib[3]; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h index 45d8c921da12..761c57d1b8eb 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h @@ -69,6 +69,9 @@ uptr internal_clock_gettime(__sanitizer_clockid_t clk_id, void *tp); // Linux-only syscalls. #if SANITIZER_LINUX uptr internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5); +# if defined(__x86_64__) +uptr internal_arch_prctl(int option, uptr arg2); +# endif // Used only by sanitizer_stoptheworld. Signal handlers that are actually used // (like the process-wide error reporting SEGV handler) must use // internal_sigaction instead. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index 327c7167dcf5..7ce6eff832e5 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -943,6 +943,9 @@ static void DisableMmapExcGuardExceptions() { set_behavior(mach_task_self(), task_exc_guard_none); } +static void VerifyInterceptorsWorking(); +static void StripEnv(); + void InitializePlatformEarly() { // Only use xnu_fast_mmap when on x86_64 and the kernel supports it. use_xnu_fast_mmap = @@ -953,17 +956,54 @@ void InitializePlatformEarly() { #endif if (GetDarwinKernelVersion() >= DarwinKernelVersion(19, 0)) DisableMmapExcGuardExceptions(); + +# if !SANITIZER_GO + MonotonicNanoTime(); // Call to initialize mach_timebase_info + VerifyInterceptorsWorking(); + StripEnv(); +# endif } #if !SANITIZER_GO static const char kDyldInsertLibraries[] = "DYLD_INSERT_LIBRARIES"; LowLevelAllocator allocator_for_env; +static bool ShouldCheckInterceptors() { + // Restrict "interceptors working?" check to ASan and TSan. + const char *sanitizer_names[] = {"AddressSanitizer", "ThreadSanitizer"}; + size_t count = sizeof(sanitizer_names) / sizeof(sanitizer_names[0]); + for (size_t i = 0; i < count; i++) { + if (internal_strcmp(sanitizer_names[i], SanitizerToolName) == 0) + return true; + } + return false; +} + +static void VerifyInterceptorsWorking() { + if (!common_flags()->verify_interceptors || !ShouldCheckInterceptors()) + return; + + // Verify that interceptors really work. We'll use dlsym to locate + // "puts", if interceptors are working, it should really point to + // "wrap_puts" within our own dylib. + Dl_info info_puts, info_runtime; + RAW_CHECK(dladdr(dlsym(RTLD_DEFAULT, "puts"), &info_puts)); + RAW_CHECK(dladdr((void *)__sanitizer_report_error_summary, &info_runtime)); + if (internal_strcmp(info_puts.dli_fname, info_runtime.dli_fname) != 0) { + Report( + "ERROR: Interceptors are not working. This may be because %s is " + "loaded too late (e.g. via dlopen). Please launch the executable " + "with:\n%s=%s\n", + SanitizerToolName, kDyldInsertLibraries, info_runtime.dli_fname); + RAW_CHECK("interceptors not installed" && 0); + } +} + // Change the value of the env var |name|, leaking the original value. // If |name_value| is NULL, the variable is deleted from the environment, // otherwise the corresponding "NAME=value" string is replaced with // |name_value|. -void LeakyResetEnv(const char *name, const char *name_value) { +static void LeakyResetEnv(const char *name, const char *name_value) { char **env = GetEnviron(); uptr name_len = internal_strlen(name); while (*env != 0) { @@ -988,100 +1028,28 @@ void LeakyResetEnv(const char *name, const char *name_value) { } } -SANITIZER_WEAK_CXX_DEFAULT_IMPL -bool ReexecDisabled() { - return false; -} - -static bool DyldNeedsEnvVariable() { - // If running on OS X 10.11+ or iOS 9.0+, dyld will interpose even if - // DYLD_INSERT_LIBRARIES is not set. - return GetMacosAlignedVersion() < MacosVersion(10, 11); -} - -void MaybeReexec() { - // FIXME: This should really live in some "InitializePlatform" method. - MonotonicNanoTime(); +static void StripEnv() { + if (!common_flags()->strip_env) + return; - if (ReexecDisabled()) return; + char *dyld_insert_libraries = + const_cast<char *>(GetEnv(kDyldInsertLibraries)); + if (!dyld_insert_libraries) + return; - // Make sure the dynamic runtime library is preloaded so that the - // wrappers work. If it is not, set DYLD_INSERT_LIBRARIES and re-exec - // ourselves. Dl_info info; - RAW_CHECK(dladdr((void*)((uptr)&__sanitizer_report_error_summary), &info)); - char *dyld_insert_libraries = - const_cast<char*>(GetEnv(kDyldInsertLibraries)); - uptr old_env_len = dyld_insert_libraries ? - internal_strlen(dyld_insert_libraries) : 0; - uptr fname_len = internal_strlen(info.dli_fname); + RAW_CHECK(dladdr((void *)__sanitizer_report_error_summary, &info)); const char *dylib_name = StripModuleName(info.dli_fname); - uptr dylib_name_len = internal_strlen(dylib_name); - - bool lib_is_in_env = dyld_insert_libraries && - internal_strstr(dyld_insert_libraries, dylib_name); - if (DyldNeedsEnvVariable() && !lib_is_in_env) { - // DYLD_INSERT_LIBRARIES is not set or does not contain the runtime - // library. - InternalMmapVector<char> program_name(1024); - uint32_t buf_size = program_name.size(); - _NSGetExecutablePath(program_name.data(), &buf_size); - char *new_env = const_cast<char*>(info.dli_fname); - if (dyld_insert_libraries) { - // Append the runtime dylib name to the existing value of - // DYLD_INSERT_LIBRARIES. - new_env = (char*)allocator_for_env.Allocate(old_env_len + fname_len + 2); - internal_strncpy(new_env, dyld_insert_libraries, old_env_len); - new_env[old_env_len] = ':'; - // Copy fname_len and add a trailing zero. - internal_strncpy(new_env + old_env_len + 1, info.dli_fname, - fname_len + 1); - // Ok to use setenv() since the wrappers don't depend on the value of - // asan_inited. - setenv(kDyldInsertLibraries, new_env, /*overwrite*/1); - } else { - // Set DYLD_INSERT_LIBRARIES equal to the runtime dylib name. - setenv(kDyldInsertLibraries, info.dli_fname, /*overwrite*/0); - } - VReport(1, "exec()-ing the program with\n"); - VReport(1, "%s=%s\n", kDyldInsertLibraries, new_env); - VReport(1, "to enable wrappers.\n"); - execv(program_name.data(), *_NSGetArgv()); - - // We get here only if execv() failed. - Report("ERROR: The process is launched without DYLD_INSERT_LIBRARIES, " - "which is required for the sanitizer to work. We tried to set the " - "environment variable and re-execute itself, but execv() failed, " - "possibly because of sandbox restrictions. Make sure to launch the " - "executable with:\n%s=%s\n", kDyldInsertLibraries, new_env); - RAW_CHECK("execv failed" && 0); - } - - // Verify that interceptors really work. We'll use dlsym to locate - // "puts", if interceptors are working, it should really point to - // "wrap_puts" within our own dylib. - Dl_info info_puts; - void *dlopen_addr = dlsym(RTLD_DEFAULT, "puts"); - RAW_CHECK(dladdr(dlopen_addr, &info_puts)); - if (internal_strcmp(info.dli_fname, info_puts.dli_fname) != 0) { - Report( - "ERROR: Interceptors are not working. This may be because %s is " - "loaded too late (e.g. via dlopen). Please launch the executable " - "with:\n%s=%s\n", - SanitizerToolName, kDyldInsertLibraries, info.dli_fname); - RAW_CHECK("interceptors not installed" && 0); - } - + bool lib_is_in_env = internal_strstr(dyld_insert_libraries, dylib_name); if (!lib_is_in_env) return; - if (!common_flags()->strip_env) - return; - // DYLD_INSERT_LIBRARIES is set and contains the runtime library. Let's remove // the dylib from the environment variable, because interceptors are installed // and we don't want our children to inherit the variable. + uptr old_env_len = internal_strlen(dyld_insert_libraries); + uptr dylib_name_len = internal_strlen(dylib_name); uptr env_name_len = internal_strlen(kDyldInsertLibraries); // Allocate memory to hold the previous env var name, its value, the '=' // sign and the '\0' char. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h index 7676e1ca264d..4d89ecaf1071 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h @@ -64,9 +64,6 @@ // - SANITIZER_DRIVERKIT #if defined(__APPLE__) # define SANITIZER_APPLE 1 -// SANITIZER_MAC will be deprecated/removed in the future -# define SANITIZER_MAC \ - error "SANITIZER_MAC will be removed, please use SANITIZER_APPLE" # include <TargetConditionals.h> # if TARGET_OS_OSX # define SANITIZER_OSX 1 @@ -100,8 +97,6 @@ # endif #else # define SANITIZER_APPLE 0 -# define SANITIZER_MAC \ - error "SANITIZER_MAC will be removed, please use SANITIZER_APPLE" # define SANITIZER_OSX 0 # define SANITIZER_IOS 0 # define SANITIZER_WATCHOS 0 @@ -177,7 +172,7 @@ #if defined(__mips__) # define SANITIZER_MIPS 1 -# if defined(__mips64) +# if defined(__mips64) && _MIPS_SIM == _ABI64 # define SANITIZER_MIPS32 0 # define SANITIZER_MIPS64 1 # else diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index 4bd425435d56..3a94b260686f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -73,7 +73,9 @@ #include <sys/vt.h> #include <linux/cdrom.h> #include <linux/fd.h> +#if SANITIZER_ANDROID #include <linux/fs.h> +#endif #include <linux/hdreg.h> #include <linux/input.h> #include <linux/ioctl.h> @@ -876,10 +878,10 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); unsigned IOCTL_EVIOCGPROP = IOCTL_NOT_PRESENT; unsigned IOCTL_EVIOCSKEYCODE_V2 = IOCTL_NOT_PRESENT; #endif - unsigned IOCTL_FS_IOC_GETFLAGS = FS_IOC_GETFLAGS; - unsigned IOCTL_FS_IOC_GETVERSION = FS_IOC_GETVERSION; - unsigned IOCTL_FS_IOC_SETFLAGS = FS_IOC_SETFLAGS; - unsigned IOCTL_FS_IOC_SETVERSION = FS_IOC_SETVERSION; + unsigned IOCTL_FS_IOC_GETFLAGS = _IOR('f', 1, long); + unsigned IOCTL_FS_IOC_GETVERSION = _IOR('v', 1, long); + unsigned IOCTL_FS_IOC_SETFLAGS = _IOW('f', 2, long); + unsigned IOCTL_FS_IOC_SETVERSION = _IOW('v', 2, long); unsigned IOCTL_GIO_CMAP = GIO_CMAP; unsigned IOCTL_GIO_FONT = GIO_FONT; unsigned IOCTL_GIO_UNIMAP = GIO_UNIMAP; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp index 66f4935bb62d..4b0e67819761 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp @@ -223,10 +223,10 @@ const mach_header *get_dyld_hdr() { if (GetMacosAlignedVersion() >= MacosVersion(13, 0)) { dyld_hdr = GetDyldImageHeaderViaSharedCache(); if (!dyld_hdr) { - Printf( - "Failed to lookup the dyld image header in the shared cache on " - "macOS 13+ (or no shared cache in use). Falling back to lookup via" - "vm_region_recurse_64().\n"); + VReport(1, + "Failed to lookup the dyld image header in the shared cache on " + "macOS 13+ (or no shared cache in use). Falling back to " + "lookup via vm_region_recurse_64().\n"); dyld_hdr = GetDyldImageHeaderViaVMRegion(); } } else { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_vector.h b/compiler-rt/lib/sanitizer_common/sanitizer_vector.h index 31216f3ec3a6..79ff275660d3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_vector.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_vector.h @@ -83,8 +83,8 @@ class Vector { } EnsureSize(size); if (old_size < size) { - for (uptr i = old_size; i < size; i++) - internal_memset(&begin_[i], 0, sizeof(begin_[i])); + internal_memset(&begin_[old_size], 0, + sizeof(begin_[old_size]) * (size - old_size)); } } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp index c997514cfed7..b4506e52efaa 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp @@ -1094,10 +1094,6 @@ void InitializePlatformEarly() { // Do nothing. } -void MaybeReexec() { - // No need to re-exec on Windows. -} - void CheckASLR() { // Do nothing } diff --git a/compiler-rt/lib/scudo/standalone/fuchsia.cpp b/compiler-rt/lib/scudo/standalone/fuchsia.cpp index 8ab2b382a36a..4ce3b4ce6d18 100644 --- a/compiler-rt/lib/scudo/standalone/fuchsia.cpp +++ b/compiler-rt/lib/scudo/standalone/fuchsia.cpp @@ -57,8 +57,9 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, if (Flags & MAP_NOACCESS) return allocateVmar(Size, Data, AllowNoMem); - const zx_handle_t Vmar = Data ? Data->Vmar : _zx_vmar_root_self(); - CHECK_NE(Vmar, ZX_HANDLE_INVALID); + const zx_handle_t Vmar = (Data && Data->Vmar != ZX_HANDLE_INVALID) + ? Data->Vmar + : _zx_vmar_root_self(); zx_status_t Status; zx_handle_t Vmo; @@ -126,7 +127,9 @@ void unmap(void *Addr, uptr Size, uptr Flags, MapPlatformData *Data) { CHECK_EQ(_zx_vmar_destroy(Vmar), ZX_OK); CHECK_EQ(_zx_handle_close(Vmar), ZX_OK); } else { - const zx_handle_t Vmar = Data ? Data->Vmar : _zx_vmar_root_self(); + const zx_handle_t Vmar = (Data && Data->Vmar != ZX_HANDLE_INVALID) + ? Data->Vmar + : _zx_vmar_root_self(); const zx_status_t Status = _zx_vmar_unmap(Vmar, reinterpret_cast<uintptr_t>(Addr), Size); if (UNLIKELY(Status != ZX_OK)) diff --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h index 9b9a84623c51..23bcfba3982a 100644 --- a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h +++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h @@ -14,7 +14,7 @@ extern "C" { -__attribute__((weak)) const char *__scudo_default_options(); +__attribute__((weak)) const char *__scudo_default_options(void); // Post-allocation & pre-deallocation hooks. // They must be thread-safe and not use heap related functions. @@ -101,14 +101,14 @@ struct scudo_error_info { struct scudo_error_report reports[3]; }; -const char *__scudo_get_stack_depot_addr(); -size_t __scudo_get_stack_depot_size(); +const char *__scudo_get_stack_depot_addr(void); +size_t __scudo_get_stack_depot_size(void); -const char *__scudo_get_region_info_addr(); -size_t __scudo_get_region_info_size(); +const char *__scudo_get_region_info_addr(void); +size_t __scudo_get_region_info_size(void); -const char *__scudo_get_ring_buffer_addr(); -size_t __scudo_get_ring_buffer_size(); +const char *__scudo_get_ring_buffer_addr(void); +size_t __scudo_get_ring_buffer_size(void); #ifndef M_DECAY_TIME #define M_DECAY_TIME -100 diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h index 293a8bc27bab..49cc6ae618af 100644 --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -82,7 +82,7 @@ public: } else { Buffer = reinterpret_cast<uptr *>( map(nullptr, roundUpTo(BufferSize, getPageSizeCached()), - "scudo:counters", MAP_ALLOWNOMEM)); + "scudo:counters", MAP_ALLOWNOMEM, &MapData)); } } ~PackedCounterArray() { @@ -92,7 +92,7 @@ public: Mutex.unlock(); else unmap(reinterpret_cast<void *>(Buffer), - roundUpTo(BufferSize, getPageSizeCached())); + roundUpTo(BufferSize, getPageSizeCached()), 0, &MapData); } bool isAllocated() const { return !!Buffer; } @@ -138,6 +138,7 @@ private: uptr SizePerRegion; uptr BufferSize; uptr *Buffer; + [[no_unique_address]] MapPlatformData MapData = {}; static HybridMutex Mutex; static uptr StaticBuffer[StaticBufferCount]; diff --git a/compiler-rt/lib/scudo/standalone/vector.h b/compiler-rt/lib/scudo/standalone/vector.h index eae774b56e28..d43205a7111d 100644 --- a/compiler-rt/lib/scudo/standalone/vector.h +++ b/compiler-rt/lib/scudo/standalone/vector.h @@ -27,7 +27,7 @@ public: } void destroy() { if (Data != &LocalData[0]) - unmap(Data, CapacityBytes); + unmap(Data, CapacityBytes, 0, &MapData); } T &operator[](uptr I) { DCHECK_LT(I, Size); @@ -83,8 +83,8 @@ private: DCHECK_GT(NewCapacity, 0); DCHECK_LE(Size, NewCapacity); NewCapacity = roundUpTo(NewCapacity * sizeof(T), getPageSizeCached()); - T *NewData = - reinterpret_cast<T *>(map(nullptr, NewCapacity, "scudo:vector")); + T *NewData = reinterpret_cast<T *>( + map(nullptr, NewCapacity, "scudo:vector", 0, &MapData)); memcpy(NewData, Data, Size * sizeof(T)); destroy(); Data = NewData; @@ -95,6 +95,7 @@ private: T LocalData[256 / sizeof(T)] = {}; uptr CapacityBytes = 0; uptr Size = 0; + [[no_unique_address]] MapPlatformData MapData = {}; }; template <typename T> class Vector : public VectorNoCtor<T> { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index 3977d60c36e5..607f373871b4 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -651,9 +651,6 @@ void Initialize(ThreadState *thr) { __tsan::InitializePlatformEarly(); #if !SANITIZER_GO - // Re-exec ourselves if we need to set additional env or command line args. - MaybeReexec(); - InitializeAllocator(); ReplaceSystemMalloc(); #endif diff --git a/libcxx/include/__algorithm/binary_search.h b/libcxx/include/__algorithm/binary_search.h index 121a741d070b..a44007237850 100644 --- a/libcxx/include/__algorithm/binary_search.h +++ b/libcxx/include/__algorithm/binary_search.h @@ -25,20 +25,20 @@ template <class _ForwardIterator, class _Tp, class _Compare> _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 bool -binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp) +binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { using _Comp_ref = typename __comp_ref_type<_Compare>::type; - __first = std::lower_bound<_ForwardIterator, _Tp, _Comp_ref>(__first, __last, __value_, __comp); - return __first != __last && !__comp(__value_, *__first); + __first = std::lower_bound<_ForwardIterator, _Tp, _Comp_ref>(__first, __last, __value, __comp); + return __first != __last && !__comp(__value, *__first); } template <class _ForwardIterator, class _Tp> _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 bool -binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_) +binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - return std::binary_search(__first, __last, __value_, + return std::binary_search(__first, __last, __value, __less<typename iterator_traits<_ForwardIterator>::value_type, _Tp>()); } diff --git a/libcxx/include/__algorithm/count.h b/libcxx/include/__algorithm/count.h index e18128cae8a8..5b546934038d 100644 --- a/libcxx/include/__algorithm/count.h +++ b/libcxx/include/__algorithm/count.h @@ -22,10 +22,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator, class _Tp> _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 typename iterator_traits<_InputIterator>::difference_type - count(_InputIterator __first, _InputIterator __last, const _Tp& __value_) { + count(_InputIterator __first, _InputIterator __last, const _Tp& __value) { typename iterator_traits<_InputIterator>::difference_type __r(0); for (; __first != __last; ++__first) - if (*__first == __value_) + if (*__first == __value) ++__r; return __r; } diff --git a/libcxx/include/__algorithm/equal_range.h b/libcxx/include/__algorithm/equal_range.h index 2a07364bb66f..f30f55be64fc 100644 --- a/libcxx/include/__algorithm/equal_range.h +++ b/libcxx/include/__algorithm/equal_range.h @@ -30,7 +30,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Compare, class _ForwardIterator, class _Tp> _LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_ForwardIterator, _ForwardIterator> -__equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp) +__equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; difference_type __len = _VSTD::distance(__first, __last); @@ -39,12 +39,12 @@ __equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va difference_type __l2 = _VSTD::__half_positive(__len); _ForwardIterator __m = __first; _VSTD::advance(__m, __l2); - if (__comp(*__m, __value_)) + if (__comp(*__m, __value)) { __first = ++__m; __len -= __l2 + 1; } - else if (__comp(__value_, *__m)) + else if (__comp(__value, *__m)) { __last = __m; __len = __l2; @@ -55,8 +55,8 @@ __equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va _ForwardIterator __mp1 = __m; return pair<_ForwardIterator, _ForwardIterator> ( - _VSTD::__lower_bound_impl<_StdIterOps>(__first, __m, __value_, __comp, __proj), - _VSTD::__upper_bound<_Compare>(++__mp1, __last, __value_, __comp) + _VSTD::__lower_bound_impl<_ClassicAlgPolicy>(__first, __m, __value, __comp, __proj), + _VSTD::__upper_bound<_Compare>(++__mp1, __last, __value, __comp) ); } } @@ -67,19 +67,19 @@ template <class _ForwardIterator, class _Tp, class _Compare> _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_ForwardIterator, _ForwardIterator> -equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp) +equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - return _VSTD::__equal_range<_Comp_ref>(__first, __last, __value_, __comp); + return _VSTD::__equal_range<_Comp_ref>(__first, __last, __value, __comp); } template <class _ForwardIterator, class _Tp> _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_ForwardIterator, _ForwardIterator> -equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_) +equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - return _VSTD::equal_range(__first, __last, __value_, + return _VSTD::equal_range(__first, __last, __value, __less<typename iterator_traits<_ForwardIterator>::value_type, _Tp>()); } diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h index be5b4740a52a..ec9968fdb8b3 100644 --- a/libcxx/include/__algorithm/fill.h +++ b/libcxx/include/__algorithm/fill.h @@ -23,26 +23,26 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _ForwardIterator, class _Tp> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 void -__fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, forward_iterator_tag) +__fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, forward_iterator_tag) { for (; __first != __last; ++__first) - *__first = __value_; + *__first = __value; } template <class _RandomAccessIterator, class _Tp> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 void -__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value_, random_access_iterator_tag) +__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value, random_access_iterator_tag) { - _VSTD::fill_n(__first, __last - __first, __value_); + _VSTD::fill_n(__first, __last - __first, __value); } template <class _ForwardIterator, class _Tp> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 void -fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_) +fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - _VSTD::__fill(__first, __last, __value_, typename iterator_traits<_ForwardIterator>::iterator_category()); + _VSTD::__fill(__first, __last, __value, typename iterator_traits<_ForwardIterator>::iterator_category()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/fill_n.h b/libcxx/include/__algorithm/fill_n.h index 590c8f38f3fd..7482a4188dd5 100644 --- a/libcxx/include/__algorithm/fill_n.h +++ b/libcxx/include/__algorithm/fill_n.h @@ -22,19 +22,19 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _OutputIterator, class _Size, class _Tp> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator -__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value_) +__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) { for (; __n > 0; ++__first, (void) --__n) - *__first = __value_; + *__first = __value; return __first; } template <class _OutputIterator, class _Size, class _Tp> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator -fill_n(_OutputIterator __first, _Size __n, const _Tp& __value_) +fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) { - return _VSTD::__fill_n(__first, _VSTD::__convert_to_integral(__n), __value_); + return _VSTD::__fill_n(__first, _VSTD::__convert_to_integral(__n), __value); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h index 641b85e2f645..ab37d81262f0 100644 --- a/libcxx/include/__algorithm/find.h +++ b/libcxx/include/__algorithm/find.h @@ -20,9 +20,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator, class _Tp> _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _InputIterator -find(_InputIterator __first, _InputIterator __last, const _Tp& __value_) { +find(_InputIterator __first, _InputIterator __last, const _Tp& __value) { for (; __first != __last; ++__first) - if (*__first == __value_) + if (*__first == __value) break; return __first; } diff --git a/libcxx/include/__algorithm/find_end.h b/libcxx/include/__algorithm/find_end.h index 0220c0939711..65e9f29b1c1f 100644 --- a/libcxx/include/__algorithm/find_end.h +++ b/libcxx/include/__algorithm/find_end.h @@ -11,8 +11,16 @@ #define _LIBCPP___ALGORITHM_FIND_END_OF_H #include <__algorithm/comp.h> +#include <__algorithm/iterator_operations.h> +#include <__algorithm/search.h> #include <__config> +#include <__functional/identity.h> +#include <__iterator/advance.h> #include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/reverse_iterator.h> +#include <__utility/pair.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -20,35 +28,52 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _BinaryPredicate, class _ForwardIterator1, class _ForwardIterator2> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator1 __find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, - _BinaryPredicate __pred, forward_iterator_tag, - forward_iterator_tag) { +template < + class _AlgPolicy, + class _Iter1, + class _Sent1, + class _Iter2, + class _Sent2, + class _Pred, + class _Proj1, + class _Proj2> +_LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_AFTER_CXX11 pair<_Iter1, _Iter1> __find_end_impl( + _Iter1 __first1, + _Sent1 __last1, + _Iter2 __first2, + _Sent2 __last2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2, + forward_iterator_tag, + forward_iterator_tag) { // modeled after search algorithm - _ForwardIterator1 __r = __last1; // __last1 is the "default" answer + _Iter1 __match_first = _IterOps<_AlgPolicy>::next(__first1, __last1); // __last1 is the "default" answer + _Iter1 __match_last = __match_first; if (__first2 == __last2) - return __r; + return pair<_Iter1, _Iter1>(__match_last, __match_last); while (true) { while (true) { - if (__first1 == __last1) // if source exhausted return last correct answer - return __r; // (or __last1 if never found) - if (__pred(*__first1, *__first2)) + if (__first1 == __last1) // if source exhausted return last correct answer (or __last1 if never found) + return pair<_Iter1, _Iter1>(__match_first, __match_last); + if (std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) break; ++__first1; } // *__first1 matches *__first2, now match elements after here - _ForwardIterator1 __m1 = __first1; - _ForwardIterator2 __m2 = __first2; + _Iter1 __m1 = __first1; + _Iter2 __m2 = __first2; while (true) { if (++__m2 == __last2) { // Pattern exhaused, record answer and search for another one - __r = __first1; + __match_first = __first1; + __match_last = ++__m1; ++__first1; break; } if (++__m1 == __last1) // Source exhausted, return last answer - return __r; - if (!__pred(*__m1, *__m2)) // mismatch, restart with a new __first + return pair<_Iter1, _Iter1>(__match_first, __match_last); + // mismatch, restart with a new __first + if (!std::__invoke(__pred, std::__invoke(__proj1, *__m1), std::__invoke(__proj2, *__m2))) { ++__first1; break; @@ -57,33 +82,52 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator1 __find_end(_ForwardIterator1 __f } } -template <class _BinaryPredicate, class _BidirectionalIterator1, class _BidirectionalIterator2> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _BidirectionalIterator1 __find_end( - _BidirectionalIterator1 __first1, _BidirectionalIterator1 __last1, _BidirectionalIterator2 __first2, - _BidirectionalIterator2 __last2, _BinaryPredicate __pred, bidirectional_iterator_tag, bidirectional_iterator_tag) { +template < + class _IterOps, + class _Pred, + class _Iter1, + class _Sent1, + class _Iter2, + class _Sent2, + class _Proj1, + class _Proj2> +_LIBCPP_CONSTEXPR_AFTER_CXX17 _Iter1 __find_end( + _Iter1 __first1, + _Sent1 __sent1, + _Iter2 __first2, + _Sent2 __sent2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2, + bidirectional_iterator_tag, + bidirectional_iterator_tag) { + auto __last1 = _IterOps::next(__first1, __sent1); + auto __last2 = _IterOps::next(__first2, __sent2); // modeled after search algorithm (in reverse) if (__first2 == __last2) return __last1; // Everything matches an empty sequence - _BidirectionalIterator1 __l1 = __last1; - _BidirectionalIterator2 __l2 = __last2; + _Iter1 __l1 = __last1; + _Iter2 __l2 = __last2; --__l2; while (true) { // Find last element in sequence 1 that matchs *(__last2-1), with a mininum of loop checks while (true) { if (__first1 == __l1) // return __last1 if no element matches *__first2 return __last1; - if (__pred(*--__l1, *__l2)) + if (std::__invoke(__pred, std::__invoke(__proj1, *--__l1), std::__invoke(__proj2, *__l2))) break; } // *__l1 matches *__l2, now match elements before here - _BidirectionalIterator1 __m1 = __l1; - _BidirectionalIterator2 __m2 = __l2; + _Iter1 __m1 = __l1; + _Iter2 __m2 = __l2; while (true) { if (__m2 == __first2) // If pattern exhausted, __m1 is the answer (works for 1 element pattern) return __m1; if (__m1 == __first1) // Otherwise if source exhaused, pattern not found return __last1; - if (!__pred(*--__m1, *--__m2)) // if there is a mismatch, restart with a new __l1 + + // if there is a mismatch, restart with a new __l1 + if (!std::__invoke(__pred, std::__invoke(__proj1, *--__m1), std::__invoke(__proj2, *--__m2))) { break; } // else there is a match, check next elements @@ -91,37 +135,53 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 _BidirectionalIterator1 __find_end( } } -template <class _BinaryPredicate, class _RandomAccessIterator1, class _RandomAccessIterator2> -_LIBCPP_CONSTEXPR_AFTER_CXX11 _RandomAccessIterator1 __find_end( - _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _BinaryPredicate __pred, random_access_iterator_tag, random_access_iterator_tag) { - typedef typename iterator_traits<_RandomAccessIterator1>::difference_type _D1; - typedef typename iterator_traits<_RandomAccessIterator2>::difference_type _D2; +template < + class _AlgPolicy, + class _Pred, + class _Iter1, + class _Sent1, + class _Iter2, + class _Sent2, + class _Proj1, + class _Proj2> +_LIBCPP_CONSTEXPR_AFTER_CXX11 _Iter1 __find_end( + _Iter1 __first1, + _Sent1 __sent1, + _Iter2 __first2, + _Sent2 __sent2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2, + random_access_iterator_tag, + random_access_iterator_tag) { + typedef typename iterator_traits<_Iter1>::difference_type _D1; + auto __last1 = _IterOps<_AlgPolicy>::next(__first1, __sent1); + auto __last2 = _IterOps<_AlgPolicy>::next(__first2, __sent2); // Take advantage of knowing source and pattern lengths. Stop short when source is smaller than pattern - _D2 __len2 = __last2 - __first2; + auto __len2 = __last2 - __first2; if (__len2 == 0) return __last1; - _D1 __len1 = __last1 - __first1; + auto __len1 = __last1 - __first1; if (__len1 < __len2) return __last1; - const _RandomAccessIterator1 __s = __first1 + _D1(__len2 - 1); // End of pattern match can't go before here - _RandomAccessIterator1 __l1 = __last1; - _RandomAccessIterator2 __l2 = __last2; + const _Iter1 __s = __first1 + _D1(__len2 - 1); // End of pattern match can't go before here + _Iter1 __l1 = __last1; + _Iter2 __l2 = __last2; --__l2; while (true) { while (true) { if (__s == __l1) return __last1; - if (__pred(*--__l1, *__l2)) + if (std::__invoke(__pred, std::__invoke(__proj1, *--__l1), std::__invoke(__proj2, *__l2))) break; } - _RandomAccessIterator1 __m1 = __l1; - _RandomAccessIterator2 __m2 = __l2; + _Iter1 __m1 = __l1; + _Iter2 __m2 = __l2; while (true) { if (__m2 == __first2) return __m1; // no need to check range on __m1 because __s guarantees we have enough source - if (!__pred(*--__m1, *--__m2)) { + if (!std::__invoke(__pred, std::__invoke(__proj1, *--__m1), std::__invoke(*--__m2))) { break; } } @@ -129,20 +189,39 @@ _LIBCPP_CONSTEXPR_AFTER_CXX11 _RandomAccessIterator1 __find_end( } template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator1 -find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, - _BinaryPredicate __pred) { - return _VSTD::__find_end<_BinaryPredicate&>( - __first1, __last1, __first2, __last2, __pred, typename iterator_traits<_ForwardIterator1>::iterator_category(), - typename iterator_traits<_ForwardIterator2>::iterator_category()); +_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +_ForwardIterator1 __find_end_classic(_ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _BinaryPredicate& __pred) { + auto __proj = __identity(); + return std::__find_end_impl<_ClassicAlgPolicy>( + __first1, + __last1, + __first2, + __last2, + __pred, + __proj, + __proj, + typename iterator_traits<_ForwardIterator1>::iterator_category(), + typename iterator_traits<_ForwardIterator2>::iterator_category()) + .first; +} + +template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate> +_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +_ForwardIterator1 find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _BinaryPredicate __pred) { + return std::__find_end_classic(__first1, __last1, __first2, __last2, __pred); } template <class _ForwardIterator1, class _ForwardIterator2> -_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator1 -find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) { - typedef typename iterator_traits<_ForwardIterator1>::value_type __v1; - typedef typename iterator_traits<_ForwardIterator2>::value_type __v2; - return _VSTD::find_end(__first1, __last1, __first2, __last2, __equal_to<__v1, __v2>()); +_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +_ForwardIterator1 find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2) { + using __v1 = typename iterator_traits<_ForwardIterator1>::value_type; + using __v2 = typename iterator_traits<_ForwardIterator2>::value_type; + return std::find_end(__first1, __last1, __first2, __last2, __equal_to<__v1, __v2>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h index c02f9bf649df..eb627e1ace7a 100644 --- a/libcxx/include/__algorithm/iterator_operations.h +++ b/libcxx/include/__algorithm/iterator_operations.h @@ -6,13 +6,20 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___ALGORIHTM_ITERATOR_OPERATIONS_H -#define _LIBCPP___ALGORIHTM_ITERATOR_OPERATIONS_H +#ifndef _LIBCPP___ALGORITHM_ITERATOR_OPERATIONS_H +#define _LIBCPP___ALGORITHM_ITERATOR_OPERATIONS_H +#include <__algorithm/iter_swap.h> #include <__config> #include <__iterator/advance.h> #include <__iterator/distance.h> +#include <__iterator/iter_move.h> +#include <__iterator/iter_swap.h> #include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__utility/forward.h> +#include <__utility/move.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -20,28 +27,69 @@ _LIBCPP_BEGIN_NAMESPACE_STD +template <class _AlgPolicy> struct _IterOps; + #if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) -struct _RangesIterOps { +struct _RangeAlgPolicy {}; + +template <> +struct _IterOps<_RangeAlgPolicy> { static constexpr auto advance = ranges::advance; static constexpr auto distance = ranges::distance; + static constexpr auto __iter_move = ranges::iter_move; + static constexpr auto iter_swap = ranges::iter_swap; + static constexpr auto next = ranges::next; + static constexpr auto __advance_to = ranges::advance; }; #endif -struct _StdIterOps { +struct _ClassicAlgPolicy {}; + +template <> +struct _IterOps<_ClassicAlgPolicy> { + + // advance + template <class _Iter, class _Distance> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 + static void advance(_Iter& __iter, _Distance __count) { + std::advance(__iter, __count); + } + + // distance + template <class _Iter> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 + static typename iterator_traits<_Iter>::difference_type distance(_Iter __first, _Iter __last) { + return std::distance(__first, __last); + } + + // iter_move + template <class _Iter> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 + // Declaring the return type is necessary for the C++03 mode (which doesn't support placeholder return types). + static typename iterator_traits<__uncvref_t<_Iter> >::value_type&& __iter_move(_Iter&& __i) { + return std::move(*std::forward<_Iter>(__i)); + } - template <class _Iterator, class _Distance> - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_AFTER_CXX11 void advance(_Iterator& __iter, _Distance __count) { - return std::advance(__iter, __count); + // iter_swap + template <class _Iter1, class _Iter2> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 + static void iter_swap(_Iter1&& __a, _Iter2&& __b) { + std::iter_swap(std::forward<_Iter1>(__a), std::forward<_Iter2>(__b)); } + // next template <class _Iterator> _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_AFTER_CXX11 - typename iterator_traits<_Iterator>::difference_type distance(_Iterator __first, _Iterator __last) { - return std::distance(__first, __last); + _Iterator next(_Iterator, _Iterator __last) { + return __last; } + template <class _Iter> + _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_AFTER_CXX11 void __advance_to(_Iter& __first, _Iter __last) { + __first = __last; + } }; _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___ALGORIHTM_ITERATOR_OPERATIONS_H +#endif // _LIBCPP___ALGORITHM_ITERATOR_OPERATIONS_H diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h index fbcd5c7e908a..2c92f715265a 100644 --- a/libcxx/include/__algorithm/lower_bound.h +++ b/libcxx/include/__algorithm/lower_bound.h @@ -19,6 +19,7 @@ #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__type_traits/is_callable.h> +#include <__type_traits/remove_reference.h> #include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -27,15 +28,15 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _IterOps, class _Iter, class _Sent, class _Type, class _Proj, class _Comp> +template <class _AlgPolicy, class _Iter, class _Sent, class _Type, class _Proj, class _Comp> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _Iter __lower_bound_impl(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) { - auto __len = _IterOps::distance(__first, __last); + auto __len = _IterOps<_AlgPolicy>::distance(__first, __last); while (__len != 0) { auto __l2 = std::__half_positive(__len); _Iter __m = __first; - _IterOps::advance(__m, __l2); + _IterOps<_AlgPolicy>::advance(__m, __l2); if (std::__invoke(__comp, std::__invoke(__proj, *__m), __value)) { __first = ++__m; __len -= __l2 + 1; @@ -48,17 +49,17 @@ _Iter __lower_bound_impl(_Iter __first, _Sent __last, const _Type& __value, _Com template <class _ForwardIterator, class _Tp, class _Compare> _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 -_ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp) { +_ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value, "The comparator has to be callable"); auto __proj = std::__identity(); - return std::__lower_bound_impl<_StdIterOps>(__first, __last, __value_, __comp, __proj); + return std::__lower_bound_impl<_ClassicAlgPolicy>(__first, __last, __value, __comp, __proj); } template <class _ForwardIterator, class _Tp> _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 -_ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_) { - return std::lower_bound(__first, __last, __value_, +_ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { + return std::lower_bound(__first, __last, __value, __less<typename iterator_traits<_ForwardIterator>::value_type, _Tp>()); } diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h index acac0aabf1e4..bc39d82bf916 100644 --- a/libcxx/include/__algorithm/make_heap.h +++ b/libcxx/include/__algorithm/make_heap.h @@ -14,6 +14,7 @@ #include <__algorithm/sift_down.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -22,36 +23,32 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Compare, class _RandomAccessIterator> -_LIBCPP_CONSTEXPR_AFTER_CXX11 void -__make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) -{ - typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; - difference_type __n = __last - __first; - if (__n > 1) - { - // start from the first parent, there is no need to consider children - for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) - { - _VSTD::__sift_down<_Compare>(__first, __comp, __n, __first + __start); - } +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +void __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) { + using _CompRef = typename __comp_ref_type<_Compare>::type; + _CompRef __comp_ref = __comp; + + using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type; + difference_type __n = __last - __first; + if (__n > 1) { + // start from the first parent, there is no need to consider children + for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) { + std::__sift_down<_CompRef>(__first, __comp_ref, __n, __first + __start); } + } } template <class _RandomAccessIterator, class _Compare> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - _VSTD::__make_heap<_Comp_ref>(__first, __last, __comp); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + std::__make_heap(std::move(__first), std::move(__last), __comp); } template <class _RandomAccessIterator> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) -{ - _VSTD::make_heap(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>()); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) { + std::make_heap(std::move(__first), std::move(__last), + __less<typename iterator_traits<_RandomAccessIterator>::value_type>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/make_projected.h b/libcxx/include/__algorithm/make_projected.h index 8141c4ed176f..6d8ebfd3d90e 100644 --- a/libcxx/include/__algorithm/make_projected.h +++ b/libcxx/include/__algorithm/make_projected.h @@ -13,6 +13,8 @@ #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> +#include <__type_traits/decay.h> +#include <__type_traits/is_member_pointer.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -28,7 +30,7 @@ namespace ranges { template <class _Comp, class _Proj> _LIBCPP_HIDE_FROM_ABI constexpr static decltype(auto) __make_projected_comp(_Comp& __comp, _Proj& __proj) { - if constexpr (same_as<_Proj, identity>) { + if constexpr (same_as<decay_t<_Proj>, identity> && !is_member_pointer_v<decay_t<_Comp>>) { // Avoid creating the lambda and just use the pristine comparator -- for certain algorithms, this would enable // optimizations that rely on the type of the comparator. return __comp; @@ -42,6 +44,24 @@ decltype(auto) __make_projected_comp(_Comp& __comp, _Proj& __proj) { } } +template <class _Comp, class _Proj1, class _Proj2> +_LIBCPP_HIDE_FROM_ABI constexpr static +decltype(auto) __make_projected_comp(_Comp& __comp, _Proj1& __proj1, _Proj2& __proj2) { + if constexpr (same_as<decay_t<_Proj1>, identity> && same_as<decay_t<_Proj2>, identity> && + !is_member_pointer_v<decay_t<_Comp>>) { + // Avoid creating the lambda and just use the pristine comparator -- for certain algorithms, this would enable + // optimizations that rely on the type of the comparator. + return __comp; + + } else { + return [&](auto&& __lhs, auto&& __rhs) { + return std::invoke(__comp, + std::invoke(__proj1, std::forward<decltype(__lhs)>(__lhs)), + std::invoke(__proj2, std::forward<decltype(__rhs)>(__rhs))); + }; + } +} + } // namespace ranges _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h index fe5f20bf1c7f..cf67184e0b4c 100644 --- a/libcxx/include/__algorithm/minmax_element.h +++ b/libcxx/include/__algorithm/minmax_element.h @@ -24,17 +24,17 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Comp, class _Proj> class _MinmaxElementLessFunc { - _Comp& __comp; - _Proj& __proj; + _Comp& __comp_; + _Proj& __proj_; public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR - _MinmaxElementLessFunc(_Comp& __comp_, _Proj& __proj_) : __comp(__comp_), __proj(__proj_) {} + _MinmaxElementLessFunc(_Comp& __comp, _Proj& __proj) : __comp_(__comp), __proj_(__proj) {} template <class _Iter> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 bool operator()(_Iter& __it1, _Iter& __it2) { - return std::__invoke(__comp, std::__invoke(__proj, *__it1), std::__invoke(__proj, *__it2)); + return std::__invoke(__comp_, std::__invoke(__proj_, *__it1), std::__invoke(__proj_, *__it2)); } }; diff --git a/libcxx/include/__algorithm/nth_element.h b/libcxx/include/__algorithm/nth_element.h index 60b9280f75f0..c7cdef5be817 100644 --- a/libcxx/include/__algorithm/nth_element.h +++ b/libcxx/include/__algorithm/nth_element.h @@ -14,13 +14,11 @@ #include <__algorithm/sort.h> #include <__config> #include <__debug> +#include <__debug_utils/randomize_range.h> #include <__iterator/iterator_traits.h> +#include <__utility/move.h> #include <__utility/swap.h> -#if defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY) -# include <__algorithm/shuffle.h> -#endif - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif @@ -223,25 +221,35 @@ __nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _Rando } template <class _RandomAccessIterator, class _Compare> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) -{ - _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last); - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - _VSTD::__nth_element<_Comp_ref>(__first, __nth, __last, __comp); - _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __nth); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void __nth_element_impl(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, + _Compare& __comp) { + if (__nth == __last) + return; + + std::__debug_randomize_range(__first, __last); + + using _Comp_ref = typename __comp_ref_type<_Compare>::type; + std::__nth_element<_Comp_ref>(__first, __nth, __last, __comp); + + std::__debug_randomize_range(__first, __nth); if (__nth != __last) { - _LIBCPP_DEBUG_RANDOMIZE_RANGE(++__nth, __last); + std::__debug_randomize_range(++__nth, __last); } } +template <class _RandomAccessIterator, class _Compare> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, + _Compare __comp) { + std::__nth_element_impl(std::move(__first), std::move(__nth), std::move(__last), __comp); +} + template <class _RandomAccessIterator> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last) -{ - _VSTD::nth_element(__first, __nth, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>()); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last) { + std::nth_element(std::move(__first), std::move(__nth), std::move(__last), __less<typename + iterator_traits<_RandomAccessIterator>::value_type>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h index 3870c0cc9335..e008c0c99679 100644 --- a/libcxx/include/__algorithm/partial_sort.h +++ b/libcxx/include/__algorithm/partial_sort.h @@ -16,13 +16,10 @@ #include <__algorithm/sort_heap.h> #include <__config> #include <__debug> +#include <__debug_utils/randomize_range.h> #include <__iterator/iterator_traits.h> #include <__utility/swap.h> -#if defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY) -# include <__algorithm/shuffle.h> -#endif - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif @@ -55,10 +52,10 @@ void partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { - _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last); + std::__debug_randomize_range(__first, __last); typedef typename __comp_ref_type<_Compare>::type _Comp_ref; _VSTD::__partial_sort<_Comp_ref>(__first, __middle, __last, __comp); - _LIBCPP_DEBUG_RANDOMIZE_RANGE(__middle, __last); + std::__debug_randomize_range(__middle, __last); } template <class _RandomAccessIterator> diff --git a/libcxx/include/__algorithm/pop_heap.h b/libcxx/include/__algorithm/pop_heap.h index 2932a5e31dbc..cadda81f6c88 100644 --- a/libcxx/include/__algorithm/pop_heap.h +++ b/libcxx/include/__algorithm/pop_heap.h @@ -13,6 +13,7 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/push_heap.h> #include <__algorithm/sift_down.h> +#include <__assert> #include <__config> #include <__iterator/iterator_traits.h> #include <__utility/move.h> @@ -24,44 +25,43 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Compare, class _RandomAccessIterator> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -__pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - typename iterator_traits<_RandomAccessIterator>::difference_type __len) -{ - using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +void __pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __len) { + _LIBCPP_ASSERT(__len > 0, "The heap given to pop_heap must be non-empty"); - if (__len > 1) - { - value_type __top = std::move(*__first); // create a hole at __first - _RandomAccessIterator __hole = std::__floyd_sift_down<_Compare>(__first, __comp, __len); - --__last; - if (__hole == __last) { - *__hole = std::move(__top); - } else { - *__hole = std::move(*__last); - ++__hole; - *__last = std::move(__top); - std::__sift_up<_Compare>(__first, __hole, __comp, __hole - __first); - } + using _CompRef = typename __comp_ref_type<_Compare>::type; + _CompRef __comp_ref = __comp; + + using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; + if (__len > 1) { + value_type __top = std::move(*__first); // create a hole at __first + _RandomAccessIterator __hole = std::__floyd_sift_down<_CompRef>(__first, __comp_ref, __len); + --__last; + + if (__hole == __last) { + *__hole = std::move(__top); + } else { + *__hole = std::move(*__last); + ++__hole; + *__last = std::move(__top); + std::__sift_up<_CompRef>(__first, __hole, __comp_ref, __hole - __first); } + } } template <class _RandomAccessIterator, class _Compare> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - _VSTD::__pop_heap<_Comp_ref>(__first, __last, __comp, __last - __first); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + typename iterator_traits<_RandomAccessIterator>::difference_type __len = __last - __first; + std::__pop_heap(std::move(__first), std::move(__last), __comp, __len); } template <class _RandomAccessIterator> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) -{ - _VSTD::pop_heap(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>()); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) { + std::pop_heap(std::move(__first), std::move(__last), + __less<typename iterator_traits<_RandomAccessIterator>::value_type>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/push_heap.h b/libcxx/include/__algorithm/push_heap.h index 66973e082f14..1e3eec373d4f 100644 --- a/libcxx/include/__algorithm/push_heap.h +++ b/libcxx/include/__algorithm/push_heap.h @@ -22,47 +22,50 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Compare, class _RandomAccessIterator> -_LIBCPP_CONSTEXPR_AFTER_CXX11 void -__sift_up(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - typename iterator_traits<_RandomAccessIterator>::difference_type __len) -{ - typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; - if (__len > 1) - { - __len = (__len - 2) / 2; - _RandomAccessIterator __ptr = __first + __len; - if (__comp(*__ptr, *--__last)) - { - value_type __t(_VSTD::move(*__last)); - do - { - *__last = _VSTD::move(*__ptr); - __last = __ptr; - if (__len == 0) - break; - __len = (__len - 1) / 2; - __ptr = __first + __len; - } while (__comp(*__ptr, __t)); - *__last = _VSTD::move(__t); - } +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +void __sift_up(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __len) { + using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; + + if (__len > 1) { + __len = (__len - 2) / 2; + _RandomAccessIterator __ptr = __first + __len; + + if (__comp(*__ptr, *--__last)) { + value_type __t(std::move(*__last)); + do { + *__last = std::move(*__ptr); + __last = __ptr; + if (__len == 0) + break; + __len = (__len - 1) / 2; + __ptr = __first + __len; + } while (__comp(*__ptr, __t)); + + *__last = std::move(__t); } + } +} + +template <class _RandomAccessIterator, class _Compare> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +void __push_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) { + using _CompRef = typename __comp_ref_type<_Compare>::type; + typename iterator_traits<_RandomAccessIterator>::difference_type __len = __last - __first; + std::__sift_up<_CompRef>(std::move(__first), std::move(__last), __comp, __len); } template <class _RandomAccessIterator, class _Compare> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -push_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - _VSTD::__sift_up<_Comp_ref>(__first, __last, __comp, __last - __first); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void push_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + std::__push_heap(std::move(__first), std::move(__last), __comp); } template <class _RandomAccessIterator> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -push_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) -{ - _VSTD::push_heap(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>()); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void push_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) { + std::push_heap(std::move(__first), std::move(__last), + __less<typename iterator_traits<_RandomAccessIterator>::value_type>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/ranges_binary_search.h b/libcxx/include/__algorithm/ranges_binary_search.h index 68359fb1388f..6da68834aa3b 100644 --- a/libcxx/include/__algorithm/ranges_binary_search.h +++ b/libcxx/include/__algorithm/ranges_binary_search.h @@ -35,7 +35,7 @@ struct __fn { indirect_strict_weak_order<const _Type*, projected<_Iter, _Proj>> _Comp = ranges::less> _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { - auto __ret = std::__lower_bound_impl<_RangesIterOps>(__first, __last, __value, __comp, __proj); + auto __ret = std::__lower_bound_impl<_RangeAlgPolicy>(__first, __last, __value, __comp, __proj); return __ret != __last && !std::invoke(__comp, __value, std::invoke(__proj, *__first)); } @@ -45,7 +45,7 @@ struct __fn { bool operator()(_Range&& __r, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __first = ranges::begin(__r); auto __last = ranges::end(__r); - auto __ret = std::__lower_bound_impl<_RangesIterOps>(__first, __last, __value, __comp, __proj); + auto __ret = std::__lower_bound_impl<_RangeAlgPolicy>(__first, __last, __value, __comp, __proj); return __ret != __last && !std::invoke(__comp, __value, std::invoke(__proj, *__first)); } }; diff --git a/libcxx/include/__algorithm/ranges_equal_range.h b/libcxx/include/__algorithm/ranges_equal_range.h new file mode 100644 index 000000000000..28d721530bda --- /dev/null +++ b/libcxx/include/__algorithm/ranges_equal_range.h @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM __project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_EQUAL_RANGE_H +#define _LIBCPP___ALGORITHM_RANGES_EQUAL_RANGE_H + +#include <__algorithm/equal_range.h> +#include <__algorithm/make_projected.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__ranges/subrange.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __equal_range { + +struct __fn { + + template <forward_iterator _Iter, sentinel_for<_Iter> _Sent, class _Tp, class _Proj = identity, + indirect_strict_weak_order<const _Tp*, projected<_Iter, _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + subrange<_Iter> operator()(_Iter __first, _Sent __last, const _Tp& __value, _Comp __comp = {}, + _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__value; (void)__comp; (void)__proj; + return {}; + } + + template <forward_range _Range, class _Tp, class _Proj = identity, + indirect_strict_weak_order<const _Tp*, projected<iterator_t<_Range>, _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_subrange_t<_Range> operator()(_Range&& __range, const _Tp& __value, _Comp __comp = {}, + _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__value; (void)__comp; (void)__proj; + return {}; + } + +}; + +} // namespace __equal_range + +inline namespace __cpo { + inline constexpr auto equal_range = __equal_range::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_EQUAL_RANGE_H diff --git a/libcxx/include/__algorithm/ranges_find_end.h b/libcxx/include/__algorithm/ranges_find_end.h new file mode 100644 index 000000000000..fec709e79f5a --- /dev/null +++ b/libcxx/include/__algorithm/ranges_find_end.h @@ -0,0 +1,113 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_FIND_END_H +#define _LIBCPP___ALGORITHM_RANGES_FIND_END_H + +#include <__algorithm/find_end.h> +#include <__algorithm/iterator_operations.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/indirectly_comparable.h> +#include <__iterator/iterator_traits.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/subrange.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <class _Iter> +consteval auto __get_iterator_concept() { + if constexpr (contiguous_iterator<_Iter>) + return contiguous_iterator_tag(); + else if constexpr (random_access_iterator<_Iter>) + return random_access_iterator_tag(); + else if constexpr (bidirectional_iterator<_Iter>) + return bidirectional_iterator_tag(); + else if constexpr (forward_iterator<_Iter>) + return forward_iterator_tag(); + else if constexpr (input_iterator<_Iter>) + return input_iterator_tag(); +} + +template <class _Iter> +using __iterator_concept = decltype(__get_iterator_concept<_Iter>()); + +namespace ranges { +namespace __find_end { +struct __fn { + template <forward_iterator _Iter1, sentinel_for<_Iter1> _Sent1, + forward_iterator _Iter2, sentinel_for<_Iter2> _Sent2, + class _Pred = ranges::equal_to, + class _Proj1 = identity, + class _Proj2 = identity> + requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr + subrange<_Iter1> operator()(_Iter1 __first1, _Sent1 __last1, + _Iter2 __first2, _Sent2 __last2, + _Pred __pred = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __ret = std::__find_end_impl<_RangeAlgPolicy>( + __first1, + __last1, + __first2, + __last2, + __pred, + __proj1, + __proj2, + __iterator_concept<_Iter1>(), + __iterator_concept<_Iter2>()); + return {__ret.first, __ret.second}; + } + + template <forward_range _Range1, + forward_range _Range2, + class _Pred = ranges::equal_to, + class _Proj1 = identity, + class _Proj2 = identity> + requires indirectly_comparable<iterator_t<_Range1>, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_subrange_t<_Range1> operator()(_Range1&& __range1, + _Range2&& __range2, + _Pred __pred = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __ret = std::__find_end_impl<_RangeAlgPolicy>( + ranges::begin(__range1), + ranges::end(__range1), + ranges::begin(__range2), + ranges::end(__range2), + __pred, + __proj1, + __proj2, + __iterator_concept<iterator_t<_Range1>>(), + __iterator_concept<iterator_t<_Range2>>()); + return {__ret.first, __ret.second}; + } +}; +} // namespace __find_end + +inline namespace __cpo { + inline constexpr auto find_end = __find_end::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_FIND_END_H diff --git a/libcxx/include/__algorithm/ranges_generate.h b/libcxx/include/__algorithm/ranges_generate.h new file mode 100644 index 000000000000..c23645e6d906 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_generate.h @@ -0,0 +1,73 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_GENERATE_H +#define _LIBCPP___ALGORITHM_RANGES_GENERATE_H + +#include <__algorithm/generate.h> +#include <__algorithm/make_projected.h> +#include <__concepts/constructible.h> +#include <__concepts/invocable.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __generate { + +struct __fn { + + template <input_or_output_iterator _OutIter, sentinel_for<_OutIter> _Sent, copy_constructible _Func> + requires invocable<_Func&> && indirectly_writable<_OutIter, invoke_result_t<_Func&>> + _LIBCPP_HIDE_FROM_ABI constexpr + _OutIter operator()(_OutIter __first, _Sent __last, _Func __gen) const { + // TODO: implement + (void)__first; (void)__last; (void)__gen; + return {}; + } + + template <class _Range, copy_constructible _Func> + requires invocable<_Func&> && output_range<_Range, invoke_result_t<_Func&>> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __range, _Func __gen) const { + // TODO: implement + (void)__range; (void)__gen; + return {}; + } + +}; + +} // namespace __generate + +inline namespace __cpo { + inline constexpr auto generate = __generate::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_GENERATE_H diff --git a/libcxx/include/__algorithm/ranges_generate_n.h b/libcxx/include/__algorithm/ranges_generate_n.h new file mode 100644 index 000000000000..bcf50e025ecc --- /dev/null +++ b/libcxx/include/__algorithm/ranges_generate_n.h @@ -0,0 +1,65 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H +#define _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H + +#include <__algorithm/generate_n.h> +#include <__algorithm/make_projected.h> +#include <__concepts/constructible.h> +#include <__concepts/invocable.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/incrementable_traits.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __generate_n { + +struct __fn { + + template <input_or_output_iterator _OutIter, copy_constructible _Func> + requires invocable<_Func&> && indirectly_writable<_OutIter, invoke_result_t<_Func&>> + _LIBCPP_HIDE_FROM_ABI constexpr + _OutIter operator()(_OutIter __first, iter_difference_t<_OutIter> __n, _Func __gen) const { + // TODO: implement + (void)__first; (void)__n; (void)__gen; + return {}; + } + +}; + +} // namespace __generate_n + +inline namespace __cpo { + inline constexpr auto generate_n = __generate_n::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H diff --git a/libcxx/include/__algorithm/ranges_includes.h b/libcxx/include/__algorithm/ranges_includes.h new file mode 100644 index 000000000000..19c17870ed6f --- /dev/null +++ b/libcxx/include/__algorithm/ranges_includes.h @@ -0,0 +1,75 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_INCLUDES_H +#define _LIBCPP___ALGORITHM_RANGES_INCLUDES_H + +#include <__algorithm/make_projected.h> +#include <__algorithm/includes.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __includes { + +struct __fn { + + template <input_iterator _Iter1, sentinel_for<_Iter1> _Sent1, input_iterator _Iter2, sentinel_for<_Iter2> _Sent2, + class _Proj1 = identity, class _Proj2 = identity, + indirect_strict_weak_order<projected<_Iter1, _Proj1>, projected<_Iter2, _Proj2>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Comp __comp = {}, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { + // TODO: implement + (void)__first1; (void)__last1; (void)__first2; (void)__last2; (void)__comp; (void)__proj1; (void)__proj2; + return {}; + } + + template <input_range _Range1, input_range _Range2, class _Proj1 = identity, class _Proj2 = identity, + indirect_strict_weak_order<projected<iterator_t<_Range1>, _Proj1>, + projected<iterator_t<_Range2>, _Proj2>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { + // TODO: implement + (void)__range1; (void)__range2; (void)__comp; (void)__proj1; (void)__proj2; + return {}; + } + +}; + +} // namespace __includes + +inline namespace __cpo { + inline constexpr auto includes = __includes::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_INCLUDES_H diff --git a/libcxx/include/__algorithm/ranges_inplace_merge.h b/libcxx/include/__algorithm/ranges_inplace_merge.h new file mode 100644 index 000000000000..a0867e486c3a --- /dev/null +++ b/libcxx/include/__algorithm/ranges_inplace_merge.h @@ -0,0 +1,73 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_INPLACE_MERGE_H +#define _LIBCPP___ALGORITHM_RANGES_INPLACE_MERGE_H + +#include <__algorithm/inplace_merge.h> +#include <__algorithm/make_projected.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__iterator/sortable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __inplace_merge { + +struct __fn { + + template <bidirectional_iterator _Iter, sentinel_for<_Iter> _Sent, class _Comp = ranges::less, class _Proj = identity> + requires sortable<_Iter, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI + _Iter operator()(_Iter __first, _Iter __middle, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__middle; (void)__last; (void)__comp; (void)__proj; + return {}; + } + + template <bidirectional_range _Range, class _Comp = ranges::less, class _Proj = identity> + requires sortable<iterator_t<_Range>, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI + borrowed_iterator_t<_Range> operator()(_Range&& __range, iterator_t<_Range> __middle, + _Comp __comp = {}, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__middle; (void)__comp; (void)__proj; + return {}; + } + +}; + +} // namespace __inplace_merge + +inline namespace __cpo { + inline constexpr auto inplace_merge = __inplace_merge::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_INPLACE_MERGE_H diff --git a/libcxx/include/__algorithm/ranges_is_heap.h b/libcxx/include/__algorithm/ranges_is_heap.h new file mode 100644 index 000000000000..0f10fa4dcec9 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_is_heap.h @@ -0,0 +1,70 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_IS_HEAP_H +#define _LIBCPP___ALGORITHM_RANGES_IS_HEAP_H + +#include <__algorithm/is_heap.h> +#include <__algorithm/make_projected.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __is_heap { + +struct __fn { + + template <random_access_iterator _Iter, sentinel_for<_Iter> _Sent, class _Proj = identity, + indirect_strict_weak_order<projected<_Iter, _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__comp; (void)__proj; + return {}; + } + + template <random_access_range _Range, class _Proj = identity, + indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + bool operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__comp; (void)__proj; + return {}; + } +}; + +} // namespace __is_heap + +inline namespace __cpo { + inline constexpr auto is_heap = __is_heap::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_IS_HEAP_H diff --git a/libcxx/include/__algorithm/ranges_is_heap_until.h b/libcxx/include/__algorithm/ranges_is_heap_until.h new file mode 100644 index 000000000000..ad021d6f2525 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_is_heap_until.h @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_IS_HEAP_UNTIL_H +#define _LIBCPP___ALGORITHM_RANGES_IS_HEAP_UNTIL_H + +#include <__algorithm/is_heap_until.h> +#include <__algorithm/make_projected.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __is_heap_until { + +struct __fn { + + template <random_access_iterator _Iter, sentinel_for<_Iter> _Sent, class _Proj = identity, + indirect_strict_weak_order<projected<_Iter, _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__comp; (void)__proj; + return {}; + } + + template <random_access_range _Range, class _Proj = identity, + indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__comp; (void)__proj; + return {}; + } + +}; + +} // namespace __is_heap_until + +inline namespace __cpo { + inline constexpr auto is_heap_until = __is_heap_until::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_IS_HEAP_UNTIL_H diff --git a/libcxx/include/__algorithm/ranges_lower_bound.h b/libcxx/include/__algorithm/ranges_lower_bound.h index a73470465cfd..1a9ae204a1ee 100644 --- a/libcxx/include/__algorithm/ranges_lower_bound.h +++ b/libcxx/include/__algorithm/ranges_lower_bound.h @@ -39,7 +39,7 @@ struct __fn { indirect_strict_weak_order<const _Type*, projected<_Iter, _Proj>> _Comp = ranges::less> _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { - return std::__lower_bound_impl<_RangesIterOps>(__first, __last, __value, __comp, __proj); + return std::__lower_bound_impl<_RangeAlgPolicy>(__first, __last, __value, __comp, __proj); } template <forward_range _Range, class _Type, class _Proj = identity, @@ -49,7 +49,7 @@ struct __fn { const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { - return std::__lower_bound_impl<_RangesIterOps>(ranges::begin(__r), ranges::end(__r), __value, __comp, __proj); + return std::__lower_bound_impl<_RangeAlgPolicy>(ranges::begin(__r), ranges::end(__r), __value, __comp, __proj); } }; } // namespace __lower_bound diff --git a/libcxx/include/__algorithm/ranges_make_heap.h b/libcxx/include/__algorithm/ranges_make_heap.h new file mode 100644 index 000000000000..fd488dc11a4b --- /dev/null +++ b/libcxx/include/__algorithm/ranges_make_heap.h @@ -0,0 +1,79 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_MAKE_HEAP_H +#define _LIBCPP___ALGORITHM_RANGES_MAKE_HEAP_H + +#include <__algorithm/make_heap.h> +#include <__algorithm/make_projected.h> +#include <__concepts/same_as.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/projected.h> +#include <__iterator/sortable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __make_heap { + +struct __fn { + template <class _Iter, class _Sent, class _Comp, class _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr static + _Iter __make_heap_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { + auto __last_iter = ranges::next(__first, __last); + + auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); + std::__make_heap(std::move(__first), __last_iter, __projected_comp); + + return __last_iter; + } + + template <random_access_iterator _Iter, sentinel_for<_Iter> _Sent, class _Comp = ranges::less, class _Proj = identity> + requires sortable<_Iter, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + return __make_heap_fn_impl(std::move(__first), std::move(__last), __comp, __proj); + } + + template <random_access_range _Range, class _Comp = ranges::less, class _Proj = identity> + requires sortable<iterator_t<_Range>, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const { + return __make_heap_fn_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); + } +}; + +} // namespace __make_heap + +inline namespace __cpo { + inline constexpr auto make_heap = __make_heap::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_MAKE_HEAP_H diff --git a/libcxx/include/__algorithm/ranges_merge.h b/libcxx/include/__algorithm/ranges_merge.h new file mode 100644 index 000000000000..c73e09e94ccc --- /dev/null +++ b/libcxx/include/__algorithm/ranges_merge.h @@ -0,0 +1,142 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_MERGE_H +#define _LIBCPP___ALGORITHM_RANGES_MERGE_H + +#include <__algorithm/in_in_out_result.h> +#include <__algorithm/ranges_copy.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/mergeable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/move.h> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter1, class _InIter2, class _OutIter> +using merge_result = in_in_out_result<_InIter1, _InIter2, _OutIter>; + +namespace __merge { + +template < + class _InIter1, + class _Sent1, + class _InIter2, + class _Sent2, + class _OutIter, + class _Comp, + class _Proj1, + class _Proj2> +_LIBCPP_HIDE_FROM_ABI constexpr merge_result<__uncvref_t<_InIter1>, __uncvref_t<_InIter2>, __uncvref_t<_OutIter>> +__merge_impl( + _InIter1&& __first1, + _Sent1&& __last1, + _InIter2&& __first2, + _Sent2&& __last2, + _OutIter&& __result, + _Comp&& __comp, + _Proj1&& __proj1, + _Proj2&& __proj2) { + for (; __first1 != __last1 && __first2 != __last2; ++__result) { + if (std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1))) { + *__result = *__first2; + ++__first2; + } else { + *__result = *__first1; + ++__first1; + } + } + auto __ret1 = ranges::copy(std::move(__first1), std::move(__last1), std::move(__result)); + auto __ret2 = ranges::copy(std::move(__first2), std::move(__last2), std::move(__ret1.out)); + return {std::move(__ret1.in), std::move(__ret2.in), std::move(__ret2.out)}; +} + +struct __fn { + template < + input_iterator _InIter1, + sentinel_for<_InIter1> _Sent1, + input_iterator _InIter2, + sentinel_for<_InIter2> _Sent2, + weakly_incrementable _OutIter, + class _Comp = less, + class _Proj1 = identity, + class _Proj2 = identity> + requires mergeable<_InIter1, _InIter2, _OutIter, _Comp, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr merge_result<_InIter1, _InIter2, _OutIter> operator()( + _InIter1 __first1, + _Sent1 __last1, + _InIter2 __first2, + _Sent2 __last2, + _OutIter __result, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + return __merge::__merge_impl(__first1, __last1, __first2, __last2, __result, __comp, __proj1, __proj2); + } + + template < + input_range _Range1, + input_range _Range2, + weakly_incrementable _OutIter, + class _Comp = less, + class _Proj1 = identity, + class _Proj2 = identity> + requires mergeable< + iterator_t<_Range1>, + iterator_t<_Range2>, + _OutIter, + _Comp, + _Proj1, + _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr merge_result<borrowed_iterator_t<_Range1>, borrowed_iterator_t<_Range2>, _OutIter> + operator()( + _Range1&& __range1, + _Range2&& __range2, + _OutIter __result, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + return __merge::__merge_impl( + ranges::begin(__range1), + ranges::end(__range1), + ranges::begin(__range2), + ranges::end(__range2), + __result, + __comp, + __proj1, + __proj2); + } +}; + +} // namespace __merge + +inline namespace __cpo { + inline constexpr auto merge = __merge::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_MERGE_H diff --git a/libcxx/include/__algorithm/ranges_nth_element.h b/libcxx/include/__algorithm/ranges_nth_element.h new file mode 100644 index 000000000000..2a929eacb89d --- /dev/null +++ b/libcxx/include/__algorithm/ranges_nth_element.h @@ -0,0 +1,79 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_NTH_ELEMENT_H +#define _LIBCPP___ALGORITHM_RANGES_NTH_ELEMENT_H + +#include <__algorithm/make_projected.h> +#include <__algorithm/nth_element.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/projected.h> +#include <__iterator/sortable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __nth_element { + +struct __fn { + template <class _Iter, class _Sent, class _Comp, class _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr static + _Iter __nth_element_fn_impl(_Iter __first, _Iter __nth, _Sent __last, _Comp& __comp, _Proj& __proj) { + auto __last_iter = ranges::next(__first, __last); + + auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); + std::__nth_element_impl(std::move(__first), std::move(__nth), __last_iter, __projected_comp); + + return __last_iter; + } + + template <random_access_iterator _Iter, sentinel_for<_Iter> _Sent, class _Comp = ranges::less, class _Proj = identity> + requires sortable<_Iter, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Iter __nth, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + return __nth_element_fn_impl(std::move(__first), std::move(__nth), std::move(__last), __comp, __proj); + } + + template <random_access_range _Range, class _Comp = ranges::less, class _Proj = identity> + requires sortable<iterator_t<_Range>, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __r, iterator_t<_Range> __nth, _Comp __comp = {}, + _Proj __proj = {}) const { + return __nth_element_fn_impl(ranges::begin(__r), std::move(__nth), ranges::end(__r), __comp, __proj); + } +}; + +} // namespace __nth_element + +inline namespace __cpo { + inline constexpr auto nth_element = __nth_element::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_NTH_ELEMENT_H diff --git a/libcxx/include/__algorithm/ranges_partial_sort_copy.h b/libcxx/include/__algorithm/ranges_partial_sort_copy.h new file mode 100644 index 000000000000..55ad2ca4e686 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_partial_sort_copy.h @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_PARTIAL_SORT_COPY_H +#define _LIBCPP___ALGORITHM_RANGES_PARTIAL_SORT_COPY_H + +#include <__algorithm/in_out_result.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/partial_sort_copy.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__iterator/sortable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter, class _OutIter> +using partial_sort_copy_result = in_out_result<_InIter, _OutIter>; + +namespace __partial_sort_copy { + +struct __fn { + + template <input_iterator _Iter1, sentinel_for<_Iter1> _Sent1, + random_access_iterator _Iter2, sentinel_for<_Iter2> _Sent2, + class _Comp = ranges::less, class _Proj1 = identity, class _Proj2 = identity> + requires indirectly_copyable<_Iter1, _Iter2> && sortable<_Iter2, _Comp, _Proj2> && + indirect_strict_weak_order<_Comp, projected<_Iter1, _Proj1>, projected<_Iter2, _Proj2>> + _LIBCPP_HIDE_FROM_ABI constexpr + partial_sort_copy_result<_Iter1, _Iter2> + operator()(_Iter1 __first, _Sent1 __last, _Iter2 __result_first, _Sent2 __result_last, + _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__result_first; (void)__result_last; (void)__comp; (void)__proj1; (void)__proj2; + return {}; + } + + template <input_range _Range1, random_access_range _Range2, class _Comp = ranges::less, + class _Proj1 = identity, class _Proj2 = identity> + requires indirectly_copyable<iterator_t<_Range1>, iterator_t<_Range2>> && + sortable<iterator_t<_Range2>, _Comp, _Proj2> && + indirect_strict_weak_order<_Comp, projected<iterator_t<_Range1>, _Proj1>, + projected<iterator_t<_Range2>, _Proj2>> + _LIBCPP_HIDE_FROM_ABI constexpr + partial_sort_copy_result<borrowed_iterator_t<_Range1>, borrowed_iterator_t<_Range2>> + operator()(_Range1&& __range, _Range2&& __result_range, _Comp __comp = {}, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { + // TODO: implement + (void)__range; (void)__result_range; (void)__comp; (void)__proj1; (void)__proj2; + return {}; + } + +}; + +} // namespace __partial_sort_copy + +inline namespace __cpo { + inline constexpr auto partial_sort_copy = __partial_sort_copy::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_PARTIAL_SORT_COPY_H diff --git a/libcxx/include/__algorithm/ranges_partition.h b/libcxx/include/__algorithm/ranges_partition.h new file mode 100644 index 000000000000..c145e7bdb4a2 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_partition.h @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_PARTITION_H +#define _LIBCPP___ALGORITHM_RANGES_PARTITION_H + +#include <__algorithm/make_projected.h> +#include <__algorithm/partition.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/permutable.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__ranges/subrange.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __partition { + +struct __fn { + + template <permutable _Iter, sentinel_for<_Iter> _Sent, class _Proj = identity, + indirect_unary_predicate<projected<_Iter, _Proj>> _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + subrange<_Iter> operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__pred; (void)__proj; + return {}; + } + + template <forward_range _Range, class _Proj = identity, + indirect_unary_predicate<projected<iterator_t<_Range>, _Proj>> _Pred> + requires permutable<iterator_t<_Range>> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_subrange_t<_Range> operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__pred; (void)__proj; + return {}; + } + +}; + +} // namespace __partition + +inline namespace __cpo { + inline constexpr auto partition = __partition::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_PARTITION_H diff --git a/libcxx/include/__algorithm/ranges_partition_copy.h b/libcxx/include/__algorithm/ranges_partition_copy.h new file mode 100644 index 000000000000..f55089b94ea5 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_partition_copy.h @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_PARTITION_COPY_H +#define _LIBCPP___ALGORITHM_RANGES_PARTITION_COPY_H + +#include <__algorithm/in_out_out_result.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/partition_copy.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter, class _OutIter1, class _OutIter2> +using partition_copy_result = in_out_out_result<_InIter, _OutIter1, _OutIter2>; + +namespace __partition_copy { + +struct __fn { + + template <input_iterator _InIter, sentinel_for<_InIter> _Sent, + weakly_incrementable _OutIter1, weakly_incrementable _OutIter2, + class _Proj = identity, indirect_unary_predicate<projected<_InIter, _Proj>> _Pred> + requires indirectly_copyable<_InIter, _OutIter1> && indirectly_copyable<_InIter, _OutIter2> + _LIBCPP_HIDE_FROM_ABI constexpr + partition_copy_result<_InIter, _OutIter1, _OutIter2> + operator()(_InIter __first, _Sent __last, _OutIter1 __out_true, _OutIter2 __out_false, + _Pred __pred, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__out_true; (void)__out_false; (void)__pred; (void)__proj; + return {}; + } + + template <input_range _Range, weakly_incrementable _OutIter1, weakly_incrementable _OutIter2, + class _Proj = identity, indirect_unary_predicate<projected<iterator_t<_Range>, _Proj>> _Pred> + requires indirectly_copyable<iterator_t<_Range>, _OutIter1> && indirectly_copyable<iterator_t<_Range>, _OutIter2> + _LIBCPP_HIDE_FROM_ABI constexpr + partition_copy_result<borrowed_iterator_t<_Range>, _OutIter1, _OutIter2> + operator()(_Range&& __range, _OutIter1 __out_true, _OutIter2 __out_false, _Pred __pred, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__out_true; (void)__out_false; (void)__pred; (void)__proj; + return {}; + } + +}; + +} // namespace __partition_copy + +inline namespace __cpo { + inline constexpr auto partition_copy = __partition_copy::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_PARTITION_COPY_H diff --git a/libcxx/include/__algorithm/ranges_partition_point.h b/libcxx/include/__algorithm/ranges_partition_point.h new file mode 100644 index 000000000000..336b29f63284 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_partition_point.h @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_PARTITION_POINT_H +#define _LIBCPP___ALGORITHM_RANGES_PARTITION_POINT_H + +#include <__algorithm/make_projected.h> +#include <__algorithm/partition_point.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __partition_point { + +struct __fn { + + template <forward_iterator _Iter, sentinel_for<_Iter> _Sent, class _Proj = identity, + indirect_unary_predicate<projected<_Iter, _Proj>> _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__pred; (void)__proj; + return {}; + } + + template <forward_range _Range, class _Proj = identity, + indirect_unary_predicate<projected<iterator_t<_Range>, _Proj>> _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__pred; (void)__proj; + return {}; + } + +}; + +} // namespace __partition_point + +inline namespace __cpo { + inline constexpr auto partition_point = __partition_point::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_PARTITION_POINT_H diff --git a/libcxx/include/__algorithm/ranges_pop_heap.h b/libcxx/include/__algorithm/ranges_pop_heap.h new file mode 100644 index 000000000000..d0b8314e5b0a --- /dev/null +++ b/libcxx/include/__algorithm/ranges_pop_heap.h @@ -0,0 +1,80 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_POP_HEAP_H +#define _LIBCPP___ALGORITHM_RANGES_POP_HEAP_H + +#include <__algorithm/make_projected.h> +#include <__algorithm/pop_heap.h> +#include <__concepts/same_as.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/projected.h> +#include <__iterator/sortable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __pop_heap { + +struct __fn { + template <class _Iter, class _Sent, class _Comp, class _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr static + _Iter __pop_heap_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { + auto __last_iter = ranges::next(__first, __last); + auto __len = __last_iter - __first; + + auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); + std::__pop_heap(std::move(__first), __last_iter, __projected_comp, __len); + + return __last_iter; + } + + template <random_access_iterator _Iter, sentinel_for<_Iter> _Sent, class _Comp = ranges::less, class _Proj = identity> + requires sortable<_Iter, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + return __pop_heap_fn_impl(std::move(__first), std::move(__last), __comp, __proj); + } + + template <random_access_range _Range, class _Comp = ranges::less, class _Proj = identity> + requires sortable<iterator_t<_Range>, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const { + return __pop_heap_fn_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); + } +}; + +} // namespace __pop_heap + +inline namespace __cpo { + inline constexpr auto pop_heap = __pop_heap::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_POP_HEAP_H diff --git a/libcxx/include/__algorithm/ranges_push_heap.h b/libcxx/include/__algorithm/ranges_push_heap.h new file mode 100644 index 000000000000..e46ad19cfed7 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_push_heap.h @@ -0,0 +1,79 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_PUSH_HEAP_H +#define _LIBCPP___ALGORITHM_RANGES_PUSH_HEAP_H + +#include <__algorithm/make_projected.h> +#include <__algorithm/push_heap.h> +#include <__concepts/same_as.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/projected.h> +#include <__iterator/sortable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __push_heap { + +struct __fn { + template <class _Iter, class _Sent, class _Comp, class _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr static + _Iter __push_heap_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { + auto __last_iter = ranges::next(__first, __last); + + auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); + std::__push_heap(std::move(__first), __last_iter, __projected_comp); + + return __last_iter; + } + + template <random_access_iterator _Iter, sentinel_for<_Iter> _Sent, class _Comp = ranges::less, class _Proj = identity> + requires sortable<_Iter, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + return __push_heap_fn_impl(std::move(__first), std::move(__last), __comp, __proj); + } + + template <random_access_range _Range, class _Comp = ranges::less, class _Proj = identity> + requires sortable<iterator_t<_Range>, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const { + return __push_heap_fn_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); + } +}; + +} // namespace __push_heap + +inline namespace __cpo { + inline constexpr auto push_heap = __push_heap::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_PUSH_HEAP_H diff --git a/libcxx/include/__algorithm/ranges_remove.h b/libcxx/include/__algorithm/ranges_remove.h new file mode 100644 index 000000000000..a6a1200763d2 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_remove.h @@ -0,0 +1,64 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_REMOVE_H +#define _LIBCPP___ALGORITHM_RANGES_REMOVE_H +#include <__config> + +#include <__algorithm/ranges_remove_if.h> +#include <__functional/identity.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/permutable.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/subrange.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __remove { +struct __fn { + + template <permutable _Iter, sentinel_for<_Iter> _Sent, class _Type, class _Proj = identity> + requires indirect_binary_predicate<ranges::equal_to, projected<_Iter, _Proj>, const _Type*> + _LIBCPP_HIDE_FROM_ABI constexpr + subrange<_Iter> operator()(_Iter __first, _Sent __last, const _Type& __value, _Proj __proj = {}) const { + auto __pred = [&](auto&& __other) { return __value == __other; }; + return ranges::__remove_if_impl(std::move(__first), std::move(__last), __pred, __proj); + } + + template <forward_range _Range, class _Type, class _Proj = identity> + requires permutable<iterator_t<_Range>> + && indirect_binary_predicate<ranges::equal_to, projected<iterator_t<_Range>, _Proj>, const _Type*> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_subrange_t<_Range> operator()(_Range&& __range, const _Type& __value, _Proj __proj = {}) const { + auto __pred = [&](auto&& __other) { return __value == __other; }; + return ranges::__remove_if_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); + } +}; +} // namespace __remove + +inline namespace __cpo { + inline constexpr auto remove = __remove::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_H diff --git a/libcxx/include/__algorithm/ranges_remove_copy.h b/libcxx/include/__algorithm/ranges_remove_copy.h new file mode 100644 index 000000000000..16e9009e7ef0 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_remove_copy.h @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_REMOVE_COPY_H +#define _LIBCPP___ALGORITHM_RANGES_REMOVE_COPY_H + +#include <__algorithm/in_out_result.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/remove_copy.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter, class _OutIter> +using remove_copy_result = in_out_result<_InIter, _OutIter>; + +namespace __remove_copy { + +struct __fn { + + template <input_iterator _InIter, sentinel_for<_InIter> _Sent, weakly_incrementable _OutIter, class _Type, + class _Proj = identity> + requires indirectly_copyable<_InIter, _OutIter> && + indirect_binary_predicate<ranges::equal_to, projected<_InIter, _Proj>, const _Type*> + _LIBCPP_HIDE_FROM_ABI constexpr + remove_copy_result<_InIter, _OutIter> + operator()(_InIter __first, _Sent __last, _OutIter __result, const _Type& __value, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__result; (void)__value; (void)__proj; + return {}; + } + + template <input_range _Range, weakly_incrementable _OutIter, class _Type, class _Proj = identity> + requires indirectly_copyable<iterator_t<_Range>, _OutIter> && + indirect_binary_predicate<ranges::equal_to, projected<iterator_t<_Range>, _Proj>, const _Type*> + _LIBCPP_HIDE_FROM_ABI constexpr + remove_copy_result<borrowed_iterator_t<_Range>, _OutIter> + operator()(_Range&& __range, _OutIter __result, const _Type& __value, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__result; (void)__value; (void)__proj; + return {}; + } + +}; + +} // namespace __remove_copy + +inline namespace __cpo { + inline constexpr auto remove_copy = __remove_copy::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_COPY_H diff --git a/libcxx/include/__algorithm/ranges_remove_copy_if.h b/libcxx/include/__algorithm/ranges_remove_copy_if.h new file mode 100644 index 000000000000..4eafe425b8e3 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_remove_copy_if.h @@ -0,0 +1,80 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_REMOVE_COPY_IF_H +#define _LIBCPP___ALGORITHM_RANGES_REMOVE_COPY_IF_H + +#include <__algorithm/in_out_result.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/remove_copy_if.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter, class _OutIter> +using remove_copy_if_result = in_out_result<_InIter, _OutIter>; + +namespace __remove_copy_if { + +struct __fn { + + template <input_iterator _InIter, sentinel_for<_InIter> _Sent, weakly_incrementable _OutIter, + class _Proj = identity, indirect_unary_predicate<projected<_InIter, _Proj>> _Pred> + requires indirectly_copyable<_InIter, _OutIter> + _LIBCPP_HIDE_FROM_ABI constexpr + remove_copy_if_result<_InIter, _OutIter> + operator()(_InIter __first, _Sent __last, _OutIter __result, _Pred __pred, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__result; (void)__pred; (void)__proj; + return {}; + } + + template <input_range _Range, weakly_incrementable _OutIter, class _Proj = identity, + indirect_unary_predicate<projected<iterator_t<_Range>, _Proj>> _Pred> + requires indirectly_copyable<iterator_t<_Range>, _OutIter> + _LIBCPP_HIDE_FROM_ABI constexpr + remove_copy_if_result<borrowed_iterator_t<_Range>, _OutIter> + operator()(_Range&& __range, _OutIter __result, _Pred __pred, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__result; (void)__pred; (void)__proj; + return {}; + } + +}; + +} // namespace __remove_copy_if + +inline namespace __cpo { + inline constexpr auto remove_copy_if = __remove_copy_if::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_COPY_IF_H diff --git a/libcxx/include/__algorithm/ranges_remove_if.h b/libcxx/include/__algorithm/ranges_remove_if.h new file mode 100644 index 000000000000..d4e382e551c6 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_remove_if.h @@ -0,0 +1,85 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_REMOVE_IF_H +#define _LIBCPP___ALGORITHM_RANGES_REMOVE_IF_H +#include <__config> + +#include <__algorithm/ranges_find_if.h> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iter_move.h> +#include <__iterator/permutable.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/subrange.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _Iter, class _Sent, class _Proj, class _Pred> +_LIBCPP_HIDE_FROM_ABI constexpr +subrange<_Iter> __remove_if_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { + auto __new_end = ranges::__find_if_impl(__first, __last, __pred, __proj); + if (__new_end == __last) + return {__new_end, __new_end}; + + _Iter __i = __new_end; + while (++__i != __last) { + if (!std::invoke(__pred, std::invoke(__proj, *__i))) { + *__new_end = ranges::iter_move(__i); + ++__new_end; + } + } + return {__new_end, __i}; +} + +namespace __remove_if { +struct __fn { + + template <permutable _Iter, sentinel_for<_Iter> _Sent, + class _Proj = identity, + indirect_unary_predicate<projected<_Iter, _Proj>> _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + subrange<_Iter> operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { + return ranges::__remove_if_impl(std::move(__first), std::move(__last), __pred, __proj); + } + + template <forward_range _Range, + class _Proj = identity, + indirect_unary_predicate<projected<iterator_t<_Range>, _Proj>> _Pred> + requires permutable<iterator_t<_Range>> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_subrange_t<_Range> operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { + return ranges::__remove_if_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); + } + +}; +} // namespace __remove_if + +inline namespace __cpo { + inline constexpr auto remove_if = __remove_if::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_IF_H diff --git a/libcxx/include/__algorithm/ranges_replace_copy.h b/libcxx/include/__algorithm/ranges_replace_copy.h new file mode 100644 index 000000000000..19ef635d6f15 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_replace_copy.h @@ -0,0 +1,84 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_REPLACE_COPY_H +#define _LIBCPP___ALGORITHM_RANGES_REPLACE_COPY_H + +#include <__algorithm/in_out_result.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/replace_copy.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter, class _OutIter> +using replace_copy_result = in_out_result<_InIter, _OutIter>; + +namespace __replace_copy { + +struct __fn { + + template <input_iterator _InIter, sentinel_for<_InIter> _Sent, class _Type1, class _Type2, + output_iterator<const _Type2&> _OutIter, class _Proj = identity> + requires indirectly_copyable<_InIter, _OutIter> && + indirect_binary_predicate<ranges::equal_to, projected<_InIter, _Proj>, const _Type1*> + _LIBCPP_HIDE_FROM_ABI constexpr + replace_copy_result<_InIter, _OutIter> + operator()(_InIter __first, _Sent __last, _OutIter __result, const _Type1& __old_value, const _Type2& __new_value, + _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__result; (void)__old_value; (void)__new_value; (void)__proj; + return {}; + } + + template <input_range _Range, class _Type1, class _Type2, output_iterator<const _Type2&> _OutIter, + class _Proj = identity> + requires indirectly_copyable<iterator_t<_Range>, _OutIter> && + indirect_binary_predicate<ranges::equal_to, projected<iterator_t<_Range>, _Proj>, const _Type1*> + _LIBCPP_HIDE_FROM_ABI constexpr + replace_copy_result<borrowed_iterator_t<_Range>, _OutIter> + operator()(_Range&& __range, _OutIter __result, const _Type1& __old_value, const _Type2& __new_value, + _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__result; (void)__old_value; (void)__new_value; (void)__proj; + return {}; + } + +}; + +} // namespace __replace_copy + +inline namespace __cpo { + inline constexpr auto replace_copy = __replace_copy::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_REPLACE_COPY_H diff --git a/libcxx/include/__algorithm/ranges_replace_copy_if.h b/libcxx/include/__algorithm/ranges_replace_copy_if.h new file mode 100644 index 000000000000..2a908e2057af --- /dev/null +++ b/libcxx/include/__algorithm/ranges_replace_copy_if.h @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_REPLACE_COPY_IF_H +#define _LIBCPP___ALGORITHM_RANGES_REPLACE_COPY_IF_H + +#include <__algorithm/in_out_result.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/replace_copy_if.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter, class _OutIter> +using replace_copy_if_result = in_out_result<_InIter, _OutIter>; + +namespace __replace_copy_if { + +struct __fn { + + template <input_iterator _InIter, sentinel_for<_InIter> _Sent, class _Type, output_iterator<const _Type&> _OutIter, + class _Proj = identity, indirect_unary_predicate<projected<_InIter, _Proj>> _Pred> + requires indirectly_copyable<_InIter, _OutIter> + _LIBCPP_HIDE_FROM_ABI constexpr + replace_copy_if_result<_InIter, _OutIter> + operator()(_InIter __first, _Sent __last, _OutIter __result, _Pred __pred, const _Type& __new_value, + _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__result; (void)__pred; (void)__new_value; (void)__proj; + return {}; + } + + template <input_range _Range, class _Type, output_iterator<const _Type&> _OutIter, class _Proj = identity, + indirect_unary_predicate<projected<iterator_t<_Range>, _Proj>> _Pred> + requires indirectly_copyable<iterator_t<_Range>, _OutIter> + _LIBCPP_HIDE_FROM_ABI constexpr + replace_copy_if_result<borrowed_iterator_t<_Range>, _OutIter> + operator()(_Range&& __range, _OutIter __result, _Pred __pred, const _Type& __new_value, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__result; (void)__pred; (void)__new_value; (void)__proj; + return {}; + } + +}; + +} // namespace __replace_copy_if + +inline namespace __cpo { + inline constexpr auto replace_copy_if = __replace_copy_if::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_REPLACE_COPY_IF_H diff --git a/libcxx/include/__algorithm/ranges_reverse_copy.h b/libcxx/include/__algorithm/ranges_reverse_copy.h new file mode 100644 index 000000000000..e2da9b484aaf --- /dev/null +++ b/libcxx/include/__algorithm/ranges_reverse_copy.h @@ -0,0 +1,67 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_REVERSE_COPY_H +#define _LIBCPP___ALGORITHM_RANGES_REVERSE_COPY_H + +#include <__algorithm/in_out_result.h> +#include <__algorithm/ranges_copy.h> +#include <__config> +#include <__iterator/concepts.h> +#include <__iterator/next.h> +#include <__iterator/reverse_iterator.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__ranges/subrange.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter, class _OutIter> +using reverse_copy_result = in_out_result<_InIter, _OutIter>; + +namespace __reverse_copy { +struct __fn { + + template <bidirectional_iterator _InIter, sentinel_for<_InIter> _Sent, weakly_incrementable _OutIter> + requires indirectly_copyable<_InIter, _OutIter> + _LIBCPP_HIDE_FROM_ABI constexpr + reverse_copy_result<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { + return (*this)(subrange(std::move(__first), std::move(__last)), std::move(__result)); + } + + template <bidirectional_range _Range, weakly_incrementable _OutIter> + requires indirectly_copyable<iterator_t<_Range>, _OutIter> + _LIBCPP_HIDE_FROM_ABI constexpr + reverse_copy_result<borrowed_iterator_t<_Range>, _OutIter> operator()(_Range&& __range, _OutIter __result) const { + auto __ret = ranges::copy(std::__reverse_range(__range), std::move(__result)); + return {ranges::next(ranges::begin(__range), ranges::end(__range)), std::move(__ret.out)}; + } + +}; +} // namespace __reverse_copy + +inline namespace __cpo { + inline constexpr auto reverse_copy = __reverse_copy::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_REVERSE_COPY_H diff --git a/libcxx/include/__algorithm/ranges_rotate_copy.h b/libcxx/include/__algorithm/ranges_rotate_copy.h new file mode 100644 index 000000000000..d7a282c86750 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_rotate_copy.h @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_ROTATE_COPY_H +#define _LIBCPP___ALGORITHM_RANGES_ROTATE_COPY_H + +#include <__algorithm/in_out_result.h> +#include <__algorithm/ranges_copy.h> +#include <__config> +#include <__iterator/concepts.h> +#include <__iterator/reverse_iterator.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter, class _OutIter> +using rotate_copy_result = in_out_result<_InIter, _OutIter>; + +namespace __rotate_copy { +struct __fn { + + template <bidirectional_iterator _InIter, sentinel_for<_InIter> _Sent, weakly_incrementable _OutIter> + requires indirectly_copyable<_InIter, _OutIter> + _LIBCPP_HIDE_FROM_ABI constexpr + rotate_copy_result<_InIter, _OutIter> + operator()(_InIter __first, _InIter __middle, _Sent __last, _OutIter __result) const { + auto __res1 = ranges::copy(__middle, __last, std::move(__result)); + auto __res2 = ranges::copy(__first, __middle, std::move(__res1.out)); + return {std::move(__res1.in), std::move(__res2.out)}; + } + + template <bidirectional_range _Range, weakly_incrementable _OutIter> + requires indirectly_copyable<iterator_t<_Range>, _OutIter> + _LIBCPP_HIDE_FROM_ABI constexpr + rotate_copy_result<borrowed_iterator_t<_Range>, _OutIter> + operator()(_Range&& __range, iterator_t<_Range> __middle, _OutIter __result) const { + return (*this)(ranges::begin(__range), std::move(__middle), ranges::end(__range), std::move(__result)); + } + +}; +} // namespace __rotate_copy + +inline namespace __cpo { + inline constexpr auto rotate_copy = __rotate_copy::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_ROTATE_COPY_H diff --git a/libcxx/include/__algorithm/ranges_search.h b/libcxx/include/__algorithm/ranges_search.h new file mode 100644 index 000000000000..0564bbe1f8b3 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_search.h @@ -0,0 +1,134 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_SEARCH_H +#define _LIBCPP___ALGORITHM_RANGES_SEARCH_H + +#include <__algorithm/iterator_operations.h> +#include <__algorithm/search.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/ranges_operations.h> +#include <__iterator/advance.h> +#include <__iterator/concepts.h> +#include <__iterator/distance.h> +#include <__iterator/indirectly_comparable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/size.h> +#include <__ranges/subrange.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __search { +struct __fn { + template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Pred, class _Proj1, class _Proj2> + _LIBCPP_HIDE_FROM_ABI static constexpr subrange<_Iter1> __ranges_search_impl( + _Iter1 __first1, + _Sent1 __last1, + _Iter2 __first2, + _Sent2 __last2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2) { + if constexpr (sized_sentinel_for<_Sent2, _Iter2>) { + auto __size2 = ranges::distance(__first2, __last2); + if (__size2 == 0) + return {__first1, __first1}; + + if constexpr (sized_sentinel_for<_Sent1, _Iter1>) { + auto __size1 = ranges::distance(__first1, __last1); + if (__size1 < __size2) { + ranges::advance(__first1, __last1); + return {__first1, __first1}; + } + + if constexpr (random_access_iterator<_Iter1> && random_access_iterator<_Iter2>) { + auto __ret = std::__search_random_access_impl<_RangeAlgPolicy>( + __first1, __last1, __first2, __last2, __pred, __proj1, __proj2, __size1, __size2); + return {__ret.first, __ret.second}; + } + } + } + + auto __ret = + std::__search_forward_impl<_RangeAlgPolicy>(__first1, __last1, __first2, __last2, __pred, __proj1, __proj2); + return {__ret.first, __ret.second}; + } + + template <forward_iterator _Iter1, sentinel_for<_Iter1> _Sent1, + forward_iterator _Iter2, sentinel_for<_Iter2> _Sent2, + class _Pred = ranges::equal_to, + class _Proj1 = identity, + class _Proj2 = identity> + requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr + subrange<_Iter1> operator()(_Iter1 __first1, _Sent1 __last1, + _Iter2 __first2, _Sent2 __last2, + _Pred __pred = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + return __ranges_search_impl(__first1, __last1, __first2, __last2, __pred, __proj1, __proj2); + } + + template <forward_range _Range1, + forward_range _Range2, + class _Pred = ranges::equal_to, + class _Proj1 = identity, + class _Proj2 = identity> + requires indirectly_comparable<iterator_t<_Range1>, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_subrange_t<_Range1> operator()(_Range1&& __range1, + _Range2&& __range2, + _Pred __pred = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __first1 = ranges::begin(__range1); + if constexpr (sized_range<_Range2>) { + auto __size2 = ranges::size(__range2); + if (__size2 == 0) + return {__first1, __first1}; + if constexpr (sized_range<_Range1>) { + auto __size1 = ranges::size(__range1); + if (__size1 < __size2) { + ranges::advance(__first1, ranges::end(__range1)); + return {__first1, __first1}; + } + } + } + + return __ranges_search_impl( + ranges::begin(__range1), + ranges::end(__range1), + ranges::begin(__range2), + ranges::end(__range2), + __pred, + __proj1, + __proj2); + } + +}; +} // namespace __search + +inline namespace __cpo { + inline constexpr auto search = __search::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_SEARCH_H diff --git a/libcxx/include/__algorithm/ranges_search_n.h b/libcxx/include/__algorithm/ranges_search_n.h new file mode 100644 index 000000000000..29fdbfb1c725 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_search_n.h @@ -0,0 +1,120 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_SEARCH_N_H +#define _LIBCPP___ALGORITHM_RANGES_SEARCH_N_H + +#include <__algorithm/iterator_operations.h> +#include <__algorithm/search_n.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/ranges_operations.h> +#include <__iterator/advance.h> +#include <__iterator/concepts.h> +#include <__iterator/distance.h> +#include <__iterator/incrementable_traits.h> +#include <__iterator/indirectly_comparable.h> +#include <__iterator/iterator_traits.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/size.h> +#include <__ranges/subrange.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __search_n { +struct __fn { + + template <class _Iter1, class _Sent1, class _SizeT, class _Type, class _Pred, class _Proj> + _LIBCPP_HIDE_FROM_ABI static constexpr subrange<_Iter1> __ranges_search_n_impl( + _Iter1 __first, _Sent1 __last, _SizeT __count, const _Type& __value, _Pred& __pred, _Proj& __proj) { + if (__count == 0) + return {__first, __first}; + + if constexpr (sized_sentinel_for<_Sent1, _Iter1>) { + auto __size = ranges::distance(__first, __last); + if (__size < __count) { + ranges::advance(__first, __last); + return {__first, __first}; + } + + if constexpr (random_access_iterator<_Iter1>) { + auto __ret = __search_n_random_access_impl<_RangeAlgPolicy>(__first, __last, + __count, + __value, + __pred, + __proj, + __size); + return {std::move(__ret.first), std::move(__ret.second)}; + } + } + + auto __ret = std::__search_n_forward_impl<_RangeAlgPolicy>(__first, __last, + __count, + __value, + __pred, + __proj); + return {std::move(__ret.first), std::move(__ret.second)}; + } + + template <forward_iterator _Iter, sentinel_for<_Iter> _Sent, + class _Type, + class _Pred = ranges::equal_to, + class _Proj = identity> + requires indirectly_comparable<_Iter, const _Type*, _Pred, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + subrange<_Iter> operator()(_Iter __first, _Sent __last, + iter_difference_t<_Iter> __count, + const _Type& __value, + _Pred __pred = {}, + _Proj __proj = _Proj{}) const { + return __ranges_search_n_impl(__first, __last, __count, __value, __pred, __proj); + } + + template <forward_range _Range, class _Type, class _Pred = ranges::equal_to, class _Proj = identity> + requires indirectly_comparable<iterator_t<_Range>, const _Type*, _Pred, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_subrange_t<_Range> operator()(_Range&& __range, + range_difference_t<_Range> __count, + const _Type& __value, + _Pred __pred = {}, + _Proj __proj = {}) const { + auto __first = ranges::begin(__range); + if (__count <= 0) + return {__first, __first}; + if constexpr (sized_range<_Range>) { + auto __size1 = ranges::size(__range); + if (__size1 < static_cast<range_size_t<_Range>>(__count)) { + ranges::advance(__first, ranges::end(__range)); + return {__first, __first}; + } + } + + return __ranges_search_n_impl(ranges::begin(__range), ranges::end(__range), __count, __value, __pred, __proj); + } +}; +} // namespace __search_n + +inline namespace __cpo { + inline constexpr auto search_n = __search_n::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_SEARCH_N_H diff --git a/libcxx/include/__algorithm/ranges_set_difference.h b/libcxx/include/__algorithm/ranges_set_difference.h new file mode 100644 index 000000000000..4eb3efad3895 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_set_difference.h @@ -0,0 +1,104 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_SET_DIFFERENCE_H +#define _LIBCPP___ALGORITHM_RANGES_SET_DIFFERENCE_H + +#include <__algorithm/in_out_result.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/set_difference.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/mergeable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__type_traits/decay.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter, class _OutIter> +using set_difference_result = in_out_result<_InIter, _OutIter>; + +namespace __set_difference { + +struct __fn { + template < + input_iterator _InIter1, + sentinel_for<_InIter1> _Sent1, + input_iterator _InIter2, + sentinel_for<_InIter2> _Sent2, + weakly_incrementable _OutIter, + class _Comp = less, + class _Proj1 = identity, + class _Proj2 = identity> + requires mergeable<_InIter1, _InIter2, _OutIter, _Comp, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr set_difference_result<_InIter1, _OutIter> operator()( + _InIter1 __first1, + _Sent1 __last1, + _InIter2 __first2, + _Sent2 __last2, + _OutIter __result, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __ret = std::__set_difference( + __first1, __last1, __first2, __last2, __result, ranges::__make_projected_comp(__comp, __proj1, __proj2)); + return {std::move(__ret.first), std::move(__ret.second)}; + } + + template < + input_range _Range1, + input_range _Range2, + weakly_incrementable _OutIter, + class _Comp = less, + class _Proj1 = identity, + class _Proj2 = identity> + requires mergeable<iterator_t<_Range1>, iterator_t<_Range2>, _OutIter, _Comp, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr set_difference_result<borrowed_iterator_t<_Range1>, _OutIter> + operator()( + _Range1&& __range1, + _Range2&& __range2, + _OutIter __result, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __ret = std::__set_difference( + ranges::begin(__range1), + ranges::end(__range1), + ranges::begin(__range2), + ranges::end(__range2), + __result, + ranges::__make_projected_comp(__comp, __proj1, __proj2)); + return {std::move(__ret.first), std::move(__ret.second)}; + } +}; + +} // namespace __set_difference + +inline namespace __cpo { + inline constexpr auto set_difference = __set_difference::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#endif // _LIBCPP___ALGORITHM_RANGES_SET_DIFFERENCE_H diff --git a/libcxx/include/__algorithm/ranges_set_intersection.h b/libcxx/include/__algorithm/ranges_set_intersection.h new file mode 100644 index 000000000000..05af91ae29e5 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_set_intersection.h @@ -0,0 +1,117 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_SET_INTERSECTION_H +#define _LIBCPP___ALGORITHM_RANGES_SET_INTERSECTION_H + +#include <__algorithm/in_in_out_result.h> +#include <__algorithm/iterator_operations.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/set_intersection.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/mergeable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter1, class _InIter2, class _OutIter> +using set_intersection_result = in_in_out_result<_InIter1, _InIter2, _OutIter>; + +namespace __set_intersection { + +struct __fn { + template < + input_iterator _InIter1, + sentinel_for<_InIter1> _Sent1, + input_iterator _InIter2, + sentinel_for<_InIter2> _Sent2, + weakly_incrementable _OutIter, + class _Comp = less, + class _Proj1 = identity, + class _Proj2 = identity> + requires mergeable<_InIter1, _InIter2, _OutIter, _Comp, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr set_intersection_result<_InIter1, _InIter2, _OutIter> operator()( + _InIter1 __first1, + _Sent1 __last1, + _InIter2 __first2, + _Sent2 __last2, + _OutIter __result, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __ret = std::__set_intersection<_RangeAlgPolicy>( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + std::move(__result), + ranges::__make_projected_comp(__comp, __proj1, __proj2)); + return {std::move(__ret.__in1_), std::move(__ret.__in2_), std::move(__ret.__out_)}; + } + + template < + input_range _Range1, + input_range _Range2, + weakly_incrementable _OutIter, + class _Comp = less, + class _Proj1 = identity, + class _Proj2 = identity> + requires mergeable< + iterator_t<_Range1>, + iterator_t<_Range2>, + _OutIter, + _Comp, + _Proj1, + _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr set_intersection_result<borrowed_iterator_t<_Range1>, + borrowed_iterator_t<_Range2>, + _OutIter> + operator()( + _Range1&& __range1, + _Range2&& __range2, + _OutIter __result, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __ret = std::__set_intersection<_RangeAlgPolicy>( + ranges::begin(__range1), + ranges::end(__range1), + ranges::begin(__range2), + ranges::end(__range2), + std::move(__result), + ranges::__make_projected_comp(__comp, __proj1, __proj2)); + return {std::move(__ret.__in1_), std::move(__ret.__in2_), std::move(__ret.__out_)}; + } +}; + +} // namespace __set_intersection + +inline namespace __cpo { + inline constexpr auto set_intersection = __set_intersection::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#endif // _LIBCPP___ALGORITHM_RANGES_SET_INTERSECTION_H diff --git a/libcxx/include/__algorithm/ranges_set_symmetric_difference.h b/libcxx/include/__algorithm/ranges_set_symmetric_difference.h new file mode 100644 index 000000000000..c54cf3a65112 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_set_symmetric_difference.h @@ -0,0 +1,116 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_SET_SYMMETRIC_DIFFERENCE_H +#define _LIBCPP___ALGORITHM_RANGES_SET_SYMMETRIC_DIFFERENCE_H + +#include <__algorithm/in_in_out_result.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/set_symmetric_difference.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/mergeable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter1, class _InIter2, class _OutIter> +using set_symmetric_difference_result = in_in_out_result<_InIter1, _InIter2, _OutIter>; + +namespace __set_symmetric_difference { + +struct __fn { + template < + input_iterator _InIter1, + sentinel_for<_InIter1> _Sent1, + input_iterator _InIter2, + sentinel_for<_InIter2> _Sent2, + weakly_incrementable _OutIter, + class _Comp = ranges::less, + class _Proj1 = identity, + class _Proj2 = identity> + requires mergeable<_InIter1, _InIter2, _OutIter, _Comp, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr set_symmetric_difference_result<_InIter1, _InIter2, _OutIter> operator()( + _InIter1 __first1, + _Sent1 __last1, + _InIter2 __first2, + _Sent2 __last2, + _OutIter __result, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __ret = std::__set_symmetric_difference( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + std::move(__result), + ranges::__make_projected_comp(__comp, __proj1, __proj2)); + return {std::move(__ret.__in1_), std::move(__ret.__in2_), std::move(__ret.__out_)}; + } + + template < + input_range _Range1, + input_range _Range2, + weakly_incrementable _OutIter, + class _Comp = ranges::less, + class _Proj1 = identity, + class _Proj2 = identity> + requires mergeable< + iterator_t<_Range1>, + iterator_t<_Range2>, + _OutIter, + _Comp, + _Proj1, + _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr set_symmetric_difference_result<borrowed_iterator_t<_Range1>, + borrowed_iterator_t<_Range2>, + _OutIter> + operator()( + _Range1&& __range1, + _Range2&& __range2, + _OutIter __result, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __ret = std::__set_symmetric_difference( + ranges::begin(__range1), + ranges::end(__range1), + ranges::begin(__range2), + ranges::end(__range2), + std::move(__result), + ranges::__make_projected_comp(__comp, __proj1, __proj2)); + return {std::move(__ret.__in1_), std::move(__ret.__in2_), std::move(__ret.__out_)}; + } +}; + +} // namespace __set_symmetric_difference + +inline namespace __cpo { + inline constexpr auto set_symmetric_difference = __set_symmetric_difference::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#endif // _LIBCPP___ALGORITHM_RANGES_SET_SYMMETRIC_DIFFERENCE_H diff --git a/libcxx/include/__algorithm/ranges_set_union.h b/libcxx/include/__algorithm/ranges_set_union.h new file mode 100644 index 000000000000..39537503b98f --- /dev/null +++ b/libcxx/include/__algorithm/ranges_set_union.h @@ -0,0 +1,86 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_SET_UNION_H +#define _LIBCPP___ALGORITHM_RANGES_SET_UNION_H + +#include <__algorithm/in_in_out_result.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/set_union.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/mergeable.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter1, class _InIter2, class _OutIter> +using set_union_result = in_in_out_result<_InIter1, _InIter2, _OutIter>; + +namespace __set_union { + +struct __fn { + + template <input_iterator _InIter1, sentinel_for<_InIter1> _Sent1, + input_iterator _InIter2, sentinel_for<_InIter2> _Sent2, + weakly_incrementable _OutIter, class _Comp = ranges::less, + class _Proj1 = identity, class _Proj2 = identity> + requires mergeable<_InIter1, _InIter2, _OutIter, _Comp, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr + set_union_result<_InIter1, _InIter2, _OutIter> + operator()(_InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Comp __comp = {}, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { + // TODO: implement + (void)__first1; (void)__last1; (void)__first2; (void)__last2; (void)__result; (void)__comp; (void)__proj1; + (void)__proj2; + return {}; + } + + template <input_range _Range1, input_range _Range2, weakly_incrementable _OutIter, + class _Comp = ranges::less, class _Proj1 = identity, class _Proj2 = identity> + requires mergeable<iterator_t<_Range1>, iterator_t<_Range2>, _OutIter, _Comp, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr + set_union_result<borrowed_iterator_t<_Range1>, borrowed_iterator_t<_Range2>, _OutIter> + operator()(_Range1&& __range1, _Range2&& __range2, _OutIter __result, _Comp __comp = {}, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { + // TODO: implement + (void)__range1; (void)__range2; (void)__result; (void)__comp; (void)__proj1; (void)__proj2; + return {}; + } + +}; + +} // namespace __set_union + +inline namespace __cpo { + inline constexpr auto set_union = __set_union::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_SET_UNION_H diff --git a/libcxx/include/__algorithm/ranges_shuffle.h b/libcxx/include/__algorithm/ranges_shuffle.h new file mode 100644 index 000000000000..bf9c28b4ce26 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_shuffle.h @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_SHUFFLE_H +#define _LIBCPP___ALGORITHM_RANGES_SHUFFLE_H + +#include <__algorithm/make_projected.h> +#include <__algorithm/shuffle.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/permutable.h> +#include <__iterator/projected.h> +#include <__random/uniform_random_bit_generator.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__type_traits/remove_reference.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __shuffle { + +struct __fn { + + template <random_access_iterator _Iter, sentinel_for<_Iter> _Sent, class _Gen> + requires permutable<_Iter> && uniform_random_bit_generator<remove_reference_t<_Gen>> + _LIBCPP_HIDE_FROM_ABI + _Iter operator()(_Iter __first, _Sent __last, _Gen&& __gen) const { + // TODO: implement + (void)__first; (void)__last; (void)__gen; + return {}; + } + + template<random_access_range _Range, class _Gen> + requires permutable<iterator_t<_Range>> && uniform_random_bit_generator<remove_reference_t<_Gen>> + _LIBCPP_HIDE_FROM_ABI + borrowed_iterator_t<_Range> operator()(_Range&& __range, _Gen&& __gen) const { + // TODO: implement + (void)__range; (void)__gen; + return {}; + } + +}; + +} // namespace __shuffle + +inline namespace __cpo { + inline constexpr auto shuffle = __shuffle::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_SHUFFLE_H diff --git a/libcxx/include/__algorithm/ranges_sort_heap.h b/libcxx/include/__algorithm/ranges_sort_heap.h new file mode 100644 index 000000000000..c753e20c44a6 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_sort_heap.h @@ -0,0 +1,79 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_SORT_HEAP_H +#define _LIBCPP___ALGORITHM_RANGES_SORT_HEAP_H + +#include <__algorithm/make_projected.h> +#include <__algorithm/sort_heap.h> +#include <__concepts/same_as.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/projected.h> +#include <__iterator/sortable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __sort_heap { + +struct __fn { + template <class _Iter, class _Sent, class _Comp, class _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr static + _Iter __sort_heap_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { + auto __last_iter = ranges::next(__first, __last); + + auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); + std::__sort_heap(std::move(__first), __last_iter, __projected_comp); + + return __last_iter; + } + + template <random_access_iterator _Iter, sentinel_for<_Iter> _Sent, class _Comp = ranges::less, class _Proj = identity> + requires sortable<_Iter, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + return __sort_heap_fn_impl(std::move(__first), std::move(__last), __comp, __proj); + } + + template <random_access_range _Range, class _Comp = ranges::less, class _Proj = identity> + requires sortable<iterator_t<_Range>, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const { + return __sort_heap_fn_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); + } +}; + +} // namespace __sort_heap + +inline namespace __cpo { + inline constexpr auto sort_heap = __sort_heap::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_SORT_HEAP_H diff --git a/libcxx/include/__algorithm/ranges_stable_partition.h b/libcxx/include/__algorithm/ranges_stable_partition.h new file mode 100644 index 000000000000..178c953ebdae --- /dev/null +++ b/libcxx/include/__algorithm/ranges_stable_partition.h @@ -0,0 +1,75 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_STABLE_PARTITION_H +#define _LIBCPP___ALGORITHM_RANGES_STABLE_PARTITION_H + +#include <__algorithm/make_projected.h> +#include <__algorithm/stable_partition.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/permutable.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__ranges/subrange.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __stable_partition { + +struct __fn { + + template <bidirectional_iterator _Iter, sentinel_for<_Iter> _Sent, class _Proj = identity, + indirect_unary_predicate<projected<_Iter, _Proj>> _Pred> + requires permutable<_Iter> + _LIBCPP_HIDE_FROM_ABI + subrange<_Iter> operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__pred; (void)__proj; + return {}; + } + + template <bidirectional_range _Range, class _Proj = identity, + indirect_unary_predicate<projected<iterator_t<_Range>, _Proj>> _Pred> + requires permutable<iterator_t<_Range>> + _LIBCPP_HIDE_FROM_ABI + borrowed_subrange_t<_Range> operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__pred; (void)__proj; + return {}; + } + +}; + +} // namespace __stable_partition + +inline namespace __cpo { + inline constexpr auto stable_partition = __stable_partition::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_STABLE_PARTITION_H diff --git a/libcxx/include/__algorithm/ranges_unique.h b/libcxx/include/__algorithm/ranges_unique.h new file mode 100644 index 000000000000..bdf755e9406e --- /dev/null +++ b/libcxx/include/__algorithm/ranges_unique.h @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_UNIQUE_H +#define _LIBCPP___ALGORITHM_RANGES_UNIQUE_H + +#include <__algorithm/make_projected.h> +#include <__algorithm/unique.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/permutable.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__ranges/subrange.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __unique { + +struct __fn { + + template <permutable _Iter, sentinel_for<_Iter> _Sent, class _Proj = identity, + indirect_equivalence_relation<projected<_Iter, _Proj>> _Comp = ranges::equal_to> + _LIBCPP_HIDE_FROM_ABI constexpr + subrange<_Iter> operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__comp; (void)__proj; + return {}; + } + + template <forward_range _Range, class _Proj = identity, + indirect_equivalence_relation<projected<iterator_t<_Range>, _Proj>> _Comp = ranges::equal_to> + requires permutable<iterator_t<_Range>> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_subrange_t<_Range> operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__comp; (void)__proj; + return {}; + } + +}; + +} // namespace __unique + +inline namespace __cpo { + inline constexpr auto unique = __unique::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_UNIQUE_H diff --git a/libcxx/include/__algorithm/ranges_unique_copy.h b/libcxx/include/__algorithm/ranges_unique_copy.h new file mode 100644 index 000000000000..56361aa8ae2f --- /dev/null +++ b/libcxx/include/__algorithm/ranges_unique_copy.h @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_UNIQUE_COPY_H +#define _LIBCPP___ALGORITHM_RANGES_UNIQUE_COPY_H + +#include <__algorithm/in_out_result.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/unique_copy.h> +#include <__concepts/same_as.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/readable_traits.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _InIter, class _OutIter> +using unique_copy_result = in_out_result<_InIter, _OutIter>; + +namespace __unique_copy { + +struct __fn { + + template <input_iterator _InIter, sentinel_for<_InIter> _Sent, weakly_incrementable _OutIter, class _Proj = identity, + indirect_equivalence_relation<projected<_InIter, _Proj>> _Comp = ranges::equal_to> + requires indirectly_copyable<_InIter, _OutIter> && + (forward_iterator<_InIter> || + (input_iterator<_OutIter> && same_as<iter_value_t<_InIter>, iter_value_t<_OutIter>>) || + indirectly_copyable_storable<_InIter, _OutIter>) + _LIBCPP_HIDE_FROM_ABI constexpr + unique_copy_result<_InIter, _OutIter> + operator()(_InIter __first, _Sent __last, _OutIter __result, _Comp __comp = {}, _Proj __proj = {}) const { + // TODO: implement + (void)__first; (void)__last; (void)__result; (void)__comp; (void)__proj; + return {}; + } + + template <input_range _Range, weakly_incrementable _OutIter, class _Proj = identity, + indirect_equivalence_relation<projected<iterator_t<_Range>, _Proj>> _Comp = ranges::equal_to> + requires indirectly_copyable<iterator_t<_Range>, _OutIter> && + (forward_iterator<iterator_t<_Range>> || + (input_iterator<_OutIter> && same_as<range_value_t<_Range>, iter_value_t<_OutIter>>) || + indirectly_copyable_storable<iterator_t<_Range>, _OutIter>) + _LIBCPP_HIDE_FROM_ABI constexpr + unique_copy_result<borrowed_iterator_t<_Range>, _OutIter> + operator()(_Range&& __range, _OutIter __result, _Comp __comp = {}, _Proj __proj = {}) const { + // TODO: implement + (void)__range; (void)__result; (void)__comp; (void)__proj; + return {}; + } + +}; + +} // namespace __unique_copy + +inline namespace __cpo { + inline constexpr auto unique_copy = __unique_copy::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_UNIQUE_COPY_H diff --git a/libcxx/include/__algorithm/ranges_upper_bound.h b/libcxx/include/__algorithm/ranges_upper_bound.h index 94b5269c86af..3c63249248fa 100644 --- a/libcxx/include/__algorithm/ranges_upper_bound.h +++ b/libcxx/include/__algorithm/ranges_upper_bound.h @@ -40,7 +40,7 @@ struct __fn { return !std::invoke(__comp, __rhs, __lhs); }; - return std::__lower_bound_impl<_RangesIterOps>(__first, __last, __value, __comp_lhs_rhs_swapped, __proj); + return std::__lower_bound_impl<_RangeAlgPolicy>(__first, __last, __value, __comp_lhs_rhs_swapped, __proj); } template <forward_range _Range, class _Type, class _Proj = identity, @@ -54,7 +54,7 @@ struct __fn { return !std::invoke(__comp, __rhs, __lhs); }; - return std::__lower_bound_impl<_RangesIterOps>(ranges::begin(__r), + return std::__lower_bound_impl<_RangeAlgPolicy>(ranges::begin(__r), ranges::end(__r), __value, __comp_lhs_rhs_swapped, diff --git a/libcxx/include/__algorithm/remove.h b/libcxx/include/__algorithm/remove.h index c00f96f78a63..8a7e99ba09a1 100644 --- a/libcxx/include/__algorithm/remove.h +++ b/libcxx/include/__algorithm/remove.h @@ -22,15 +22,15 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _ForwardIterator, class _Tp> _LIBCPP_NODISCARD_EXT _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator -remove(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_) +remove(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - __first = _VSTD::find(__first, __last, __value_); + __first = _VSTD::find(__first, __last, __value); if (__first != __last) { _ForwardIterator __i = __first; while (++__i != __last) { - if (!(*__i == __value_)) + if (!(*__i == __value)) { *__first = _VSTD::move(*__i); ++__first; diff --git a/libcxx/include/__algorithm/remove_copy.h b/libcxx/include/__algorithm/remove_copy.h index a29a385af9ac..55fc1d90a1e7 100644 --- a/libcxx/include/__algorithm/remove_copy.h +++ b/libcxx/include/__algorithm/remove_copy.h @@ -20,11 +20,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator, class _OutputIterator, class _Tp> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator -remove_copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result, const _Tp& __value_) +remove_copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result, const _Tp& __value) { for (; __first != __last; ++__first) { - if (!(*__first == __value_)) + if (!(*__first == __value)) { *__result = *__first; ++__result; diff --git a/libcxx/include/__algorithm/search.h b/libcxx/include/__algorithm/search.h index d89ec2b1c5bc..4ead6cac82b7 100644 --- a/libcxx/include/__algorithm/search.h +++ b/libcxx/include/__algorithm/search.h @@ -11,9 +11,15 @@ #define _LIBCPP___ALGORITHM_SEARCH_H #include <__algorithm/comp.h> +#include <__algorithm/iterator_operations.h> #include <__config> +#include <__functional/identity.h> +#include <__iterator/advance.h> +#include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/is_callable.h> #include <__utility/pair.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,31 +27,43 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _BinaryPredicate, class _ForwardIterator1, class _ForwardIterator2> -pair<_ForwardIterator1, _ForwardIterator1> - _LIBCPP_CONSTEXPR_AFTER_CXX11 __search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, - _BinaryPredicate __pred, forward_iterator_tag, forward_iterator_tag) { +template <class _AlgPolicy, + class _Iter1, class _Sent1, + class _Iter2, class _Sent2, + class _Pred, + class _Proj1, + class _Proj2> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +pair<_Iter1, _Iter1> __search_forward_impl(_Iter1 __first1, _Sent1 __last1, + _Iter2 __first2, _Sent2 __last2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2) { if (__first2 == __last2) - return _VSTD::make_pair(__first1, __first1); // Everything matches an empty sequence + return std::make_pair(__first1, __first1); // Everything matches an empty sequence while (true) { // Find first element in sequence 1 that matchs *__first2, with a mininum of loop checks while (true) { - if (__first1 == __last1) // return __last1 if no element matches *__first2 - return _VSTD::make_pair(__last1, __last1); - if (__pred(*__first1, *__first2)) + if (__first1 == __last1) { // return __last1 if no element matches *__first2 + _IterOps<_AlgPolicy>::__advance_to(__first1, __last1); + return std::make_pair(__first1, __first1); + } + if (std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) break; ++__first1; } // *__first1 matches *__first2, now match elements after here - _ForwardIterator1 __m1 = __first1; - _ForwardIterator2 __m2 = __first2; + _Iter1 __m1 = __first1; + _Iter2 __m2 = __first2; while (true) { if (++__m2 == __last2) // If pattern exhausted, __first1 is the answer (works for 1 element pattern) - return _VSTD::make_pair(__first1, __m1); - if (++__m1 == __last1) // Otherwise if source exhaused, pattern not found - return _VSTD::make_pair(__last1, __last1); - if (!__pred(*__m1, *__m2)) // if there is a mismatch, restart with a new __first1 + return std::make_pair(__first1, ++__m1); + if (++__m1 == __last1) { // Otherwise if source exhaused, pattern not found + return std::make_pair(__m1, __m1); + } + + // if there is a mismatch, restart with a new __first1 + if (!std::__invoke(__pred, std::__invoke(__proj1, *__m1), std::__invoke(__proj2, *__m2))) { ++__first1; break; @@ -54,38 +72,42 @@ pair<_ForwardIterator1, _ForwardIterator1> } } -template <class _BinaryPredicate, class _RandomAccessIterator1, class _RandomAccessIterator2> -_LIBCPP_CONSTEXPR_AFTER_CXX11 pair<_RandomAccessIterator1, _RandomAccessIterator1> -__search(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _BinaryPredicate __pred, random_access_iterator_tag, - random_access_iterator_tag) { - typedef typename iterator_traits<_RandomAccessIterator1>::difference_type _D1; - typedef typename iterator_traits<_RandomAccessIterator2>::difference_type _D2; - // Take advantage of knowing source and pattern lengths. Stop short when source is smaller than pattern - const _D2 __len2 = __last2 - __first2; - if (__len2 == 0) - return _VSTD::make_pair(__first1, __first1); - const _D1 __len1 = __last1 - __first1; - if (__len1 < __len2) - return _VSTD::make_pair(__last1, __last1); - const _RandomAccessIterator1 __s = __last1 - _D1(__len2 - 1); // Start of pattern match can't go beyond here +template <class _AlgPolicy, + class _Iter1, class _Sent1, + class _Iter2, class _Sent2, + class _Pred, + class _Proj1, + class _Proj2, + class _DiffT1, + class _DiffT2> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +pair<_Iter1, _Iter1> __search_random_access_impl(_Iter1 __first1, _Sent1 __last1, + _Iter2 __first2, _Sent2 __last2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2, + _DiffT1 __size1, + _DiffT2 __size2) { + const _Iter1 __s = __first1 + __size1 - _DiffT1(__size2 - 1); // Start of pattern match can't go beyond here while (true) { while (true) { - if (__first1 == __s) - return _VSTD::make_pair(__last1, __last1); - if (__pred(*__first1, *__first2)) + if (__first1 == __s) { + _IterOps<_AlgPolicy>::__advance_to(__first1, __last1); + return std::make_pair(__first1, __first1); + } + if (std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) break; ++__first1; } - _RandomAccessIterator1 __m1 = __first1; - _RandomAccessIterator2 __m2 = __first2; + _Iter1 __m1 = __first1; + _Iter2 __m2 = __first2; while (true) { if (++__m2 == __last2) - return _VSTD::make_pair(__first1, __first1 + _D1(__len2)); + return std::make_pair(__first1, __first1 + _DiffT1(__size2)); ++__m1; // no need to check range on __m1 because __s guarantees we have enough source - if (!__pred(*__m1, *__m2)) { + if (!std::__invoke(__pred, std::__invoke(__proj1, *__m1), std::__invoke(__proj2, *__m2))) { ++__first1; break; } @@ -93,22 +115,78 @@ __search(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _Rando } } +template <class _Iter1, class _Sent1, + class _Iter2, class _Sent2, + class _Pred, + class _Proj1, + class _Proj2> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +pair<_Iter1, _Iter1> __search_impl(_Iter1 __first1, _Sent1 __last1, + _Iter2 __first2, _Sent2 __last2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2, + __enable_if_t<__is_cpp17_random_access_iterator<_Iter1>::value + && __is_cpp17_random_access_iterator<_Iter2>::value>* = nullptr) { + + auto __size2 = __last2 - __first2; + if (__size2 == 0) + return std::make_pair(__first1, __first1); + + auto __size1 = __last1 - __first1; + if (__size1 < __size2) { + return std::make_pair(__last1, __last1); + } + + return std::__search_random_access_impl<_ClassicAlgPolicy>(__first1, __last1, + __first2, __last2, + __pred, + __proj1, + __proj2, + __size1, + __size2); +} + +template <class _Iter1, class _Sent1, + class _Iter2, class _Sent2, + class _Pred, + class _Proj1, + class _Proj2> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +pair<_Iter1, _Iter1> __search_impl(_Iter1 __first1, _Sent1 __last1, + _Iter2 __first2, _Sent2 __last2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2, + __enable_if_t<__is_cpp17_forward_iterator<_Iter1>::value + && __is_cpp17_forward_iterator<_Iter2>::value + && !(__is_cpp17_random_access_iterator<_Iter1>::value + && __is_cpp17_random_access_iterator<_Iter2>::value)>* = nullptr) { + return std::__search_forward_impl<_ClassicAlgPolicy>(__first1, __last1, + __first2, __last2, + __pred, + __proj1, + __proj2); +} + template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator1 -search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, - _BinaryPredicate __pred) { - return _VSTD::__search<_BinaryPredicate&>( - __first1, __last1, __first2, __last2, __pred, - typename iterator_traits<_ForwardIterator1>::iterator_category(), - typename iterator_traits<_ForwardIterator2>::iterator_category()).first; +_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_ForwardIterator1 search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _BinaryPredicate __pred) { + static_assert(__is_callable<_BinaryPredicate, decltype(*__first1), decltype(*__first2)>::value, + "BinaryPredicate has to be callable"); + auto __proj = __identity(); + return std::__search_impl(__first1, __last1, __first2, __last2, __pred, __proj, __proj).first; } template <class _ForwardIterator1, class _ForwardIterator2> -_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator1 -search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) { - typedef typename iterator_traits<_ForwardIterator1>::value_type __v1; - typedef typename iterator_traits<_ForwardIterator2>::value_type __v2; - return _VSTD::search(__first1, __last1, __first2, __last2, __equal_to<__v1, __v2>()); +_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_ForwardIterator1 search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2) { + using __v1 = typename iterator_traits<_ForwardIterator1>::value_type; + using __v2 = typename iterator_traits<_ForwardIterator2>::value_type; + return std::search(__first1, __last1, __first2, __last2, __equal_to<__v1, __v2>()); } #if _LIBCPP_STD_VER > 14 diff --git a/libcxx/include/__algorithm/search_n.h b/libcxx/include/__algorithm/search_n.h index 4c083de65ee2..2a0547565ee9 100644 --- a/libcxx/include/__algorithm/search_n.h +++ b/libcxx/include/__algorithm/search_n.h @@ -11,8 +11,15 @@ #define _LIBCPP___ALGORITHM_SEARCH_N_H #include <__algorithm/comp.h> +#include <__algorithm/iterator_operations.h> #include <__config> +#include <__functional/identity.h> +#include <__iterator/advance.h> +#include <__iterator/concepts.h> +#include <__iterator/distance.h> #include <__iterator/iterator_traits.h> +#include <__ranges/concepts.h> +#include <__utility/pair.h> #include <type_traits> // __convert_to_integral #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -21,30 +28,39 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _BinaryPredicate, class _ForwardIterator, class _Size, class _Tp> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator __search_n(_ForwardIterator __first, _ForwardIterator __last, - _Size __count, const _Tp& __value_, _BinaryPredicate __pred, - forward_iterator_tag) { +template <class _AlgPolicy, class _Pred, class _Iter, class _Sent, class _SizeT, class _Type, class _Proj> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +pair<_Iter, _Iter> __search_n_forward_impl(_Iter __first, _Sent __last, + _SizeT __count, + const _Type& __value, + _Pred& __pred, + _Proj& __proj) { if (__count <= 0) - return __first; + return std::make_pair(__first, __first); while (true) { - // Find first element in sequence that matchs __value_, with a mininum of loop checks + // Find first element in sequence that matchs __value, with a mininum of loop checks while (true) { - if (__first == __last) // return __last if no element matches __value_ - return __last; - if (__pred(*__first, __value_)) + if (__first == __last) { // return __last if no element matches __value + _IterOps<_AlgPolicy>::__advance_to(__first, __last); + return std::make_pair(__first, __first); + } + if (std::__invoke(__pred, std::__invoke(__proj, *__first), __value)) break; ++__first; } - // *__first matches __value_, now match elements after here - _ForwardIterator __m = __first; - _Size __c(0); + // *__first matches __value, now match elements after here + _Iter __m = __first; + _SizeT __c(0); while (true) { if (++__c == __count) // If pattern exhausted, __first is the answer (works for 1 element pattern) - return __first; - if (++__m == __last) // Otherwise if source exhaused, pattern not found - return __last; - if (!__pred(*__m, __value_)) // if there is a mismatch, restart with a new __first + return std::make_pair(__first, ++__m); + if (++__m == __last) { // Otherwise if source exhaused, pattern not found + _IterOps<_AlgPolicy>::__advance_to(__first, __last); + return std::make_pair(__first, __first); + } + + // if there is a mismatch, restart with a new __first + if (!std::__invoke(__pred, std::__invoke(__proj, *__m), __value)) { __first = __m; ++__first; @@ -54,35 +70,44 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator __search_n(_ForwardIterator __fir } } -template <class _BinaryPredicate, class _RandomAccessIterator, class _Size, class _Tp> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _RandomAccessIterator __search_n(_RandomAccessIterator __first, - _RandomAccessIterator __last, _Size __count, - const _Tp& __value_, _BinaryPredicate __pred, - random_access_iterator_tag) { - typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; - if (__count <= 0) - return __first; - _Size __len = static_cast<_Size>(__last - __first); - if (__len < __count) - return __last; - const _RandomAccessIterator __s = __last - difference_type(__count - 1); // Start of pattern match can't go beyond here +template <class _AlgPolicy, class _Pred, class _Iter, class _Sent, class _SizeT, class _Type, class _Proj, class _DiffT> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +std::pair<_Iter, _Iter> __search_n_random_access_impl(_Iter __first, _Sent __last, + _SizeT __count, + const _Type& __value, + _Pred& __pred, + _Proj& __proj, + _DiffT __size1) { + using difference_type = typename iterator_traits<_Iter>::difference_type; + if (__count == 0) + return std::make_pair(__first, __first); + if (__size1 < static_cast<_DiffT>(__count)) { + _IterOps<_AlgPolicy>::__advance_to(__first, __last); + return std::make_pair(__first, __first); + } + + const auto __s = __first + __size1 - difference_type(__count - 1); // Start of pattern match can't go beyond here while (true) { - // Find first element in sequence that matchs __value_, with a mininum of loop checks + // Find first element in sequence that matchs __value, with a mininum of loop checks while (true) { - if (__first >= __s) // return __last if no element matches __value_ - return __last; - if (__pred(*__first, __value_)) + if (__first >= __s) { // return __last if no element matches __value + _IterOps<_AlgPolicy>::__advance_to(__first, __last); + return std::make_pair(__first, __first); + } + if (std::__invoke(__pred, std::__invoke(__proj, *__first), __value)) break; ++__first; } // *__first matches __value_, now match elements after here - _RandomAccessIterator __m = __first; - _Size __c(0); + auto __m = __first; + _SizeT __c(0); while (true) { if (++__c == __count) // If pattern exhausted, __first is the answer (works for 1 element pattern) - return __first; - ++__m; // no need to check range on __m because __s guarantees we have enough source - if (!__pred(*__m, __value_)) // if there is a mismatch, restart with a new __first + return std::make_pair(__first, __first + _DiffT(__count)); + ++__m; // no need to check range on __m because __s guarantees we have enough source + + // if there is a mismatch, restart with a new __first + if (!std::__invoke(__pred, std::__invoke(__proj, *__m), __value)) { __first = __m; ++__first; @@ -92,19 +117,63 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 _RandomAccessIterator __search_n(_RandomAccessIter } } +template <class _Iter, class _Sent, + class _DiffT, + class _Type, + class _Pred, + class _Proj> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +pair<_Iter, _Iter> __search_n_impl(_Iter __first, _Sent __last, + _DiffT __count, + const _Type& __value, + _Pred& __pred, + _Proj& __proj, + __enable_if_t<__is_cpp17_random_access_iterator<_Iter>::value>* = nullptr) { + return std::__search_n_random_access_impl<_ClassicAlgPolicy>(__first, __last, + __count, + __value, + __pred, + __proj, + __last - __first); +} + +template <class _Iter1, class _Sent1, + class _DiffT, + class _Type, + class _Pred, + class _Proj> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +pair<_Iter1, _Iter1> __search_n_impl(_Iter1 __first, _Sent1 __last, + _DiffT __count, + const _Type& __value, + _Pred& __pred, + _Proj& __proj, + __enable_if_t<__is_cpp17_forward_iterator<_Iter1>::value + && !__is_cpp17_random_access_iterator<_Iter1>::value>* = nullptr) { + return std::__search_n_forward_impl<_ClassicAlgPolicy>(__first, __last, + __count, + __value, + __pred, + __proj); +} + template <class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate> -_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator search_n( - _ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value_, _BinaryPredicate __pred) { - return _VSTD::__search_n<_BinaryPredicate&>( - __first, __last, _VSTD::__convert_to_integral(__count), __value_, __pred, - typename iterator_traits<_ForwardIterator>::iterator_category()); +_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +_ForwardIterator search_n(_ForwardIterator __first, _ForwardIterator __last, + _Size __count, + const _Tp& __value, + _BinaryPredicate __pred) { + static_assert(__is_callable<_BinaryPredicate, decltype(*__first), decltype(*__last)>::value, + "BinaryPredicate has to be callable"); + auto __proj = __identity(); + return std::__search_n_impl(__first, __last, std::__convert_to_integral(__count), __value, __pred, __proj).first; } template <class _ForwardIterator, class _Size, class _Tp> -_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator -search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value_) { +_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +_ForwardIterator search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value) { typedef typename iterator_traits<_ForwardIterator>::value_type __v; - return _VSTD::search_n(__first, __last, _VSTD::__convert_to_integral(__count), __value_, __equal_to<__v, _Tp>()); + return std::search_n(__first, __last, std::__convert_to_integral(__count), __value, __equal_to<__v, _Tp>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/set_difference.h b/libcxx/include/__algorithm/set_difference.h index 5e2dca24e446..4378bd5304d9 100644 --- a/libcxx/include/__algorithm/set_difference.h +++ b/libcxx/include/__algorithm/set_difference.h @@ -13,7 +13,12 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/copy.h> #include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> #include <__iterator/iterator_traits.h> +#include <__utility/move.h> +#include <__utility/pair.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,50 +26,52 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator -__set_difference(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) -{ - while (__first1 != __last1) - { - if (__first2 == __last2) - return _VSTD::copy(__first1, __last1, __result); - if (__comp(*__first1, *__first2)) - { - *__result = *__first1; - ++__result; - ++__first1; - } - else - { - if (!__comp(*__first2, *__first1)) - ++__first1; - ++__first2; - } +template < class _Comp, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 pair<__uncvref_t<_InIter1>, __uncvref_t<_OutIter> > +__set_difference( + _InIter1&& __first1, _Sent1&& __last1, _InIter2&& __first2, _Sent2&& __last2, _OutIter&& __result, _Comp&& __comp) { + while (__first1 != __last1 && __first2 != __last2) { + if (__comp(*__first1, *__first2)) { + *__result = *__first1; + ++__first1; + ++__result; + } else if (__comp(*__first2, *__first1)) { + ++__first2; + } else { + ++__first1; + ++__first2; } - return __result; + } + return std::__copy(std::move(__first1), std::move(__last1), std::move(__result)); } template <class _InputIterator1, class _InputIterator2, class _OutputIterator, class _Compare> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -set_difference(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - return _VSTD::__set_difference<_Comp_ref>(__first1, __last1, __first2, __last2, __result, __comp); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator set_difference( + _InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, + _InputIterator2 __last2, + _OutputIterator __result, + _Compare __comp) { + typedef typename __comp_ref_type<_Compare>::type _Comp_ref; + return std::__set_difference<_Comp_ref>(__first1, __last1, __first2, __last2, __result, __comp).second; } template <class _InputIterator1, class _InputIterator2, class _OutputIterator> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -set_difference(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result) -{ - return _VSTD::set_difference(__first1, __last1, __first2, __last2, __result, - __less<typename iterator_traits<_InputIterator1>::value_type, - typename iterator_traits<_InputIterator2>::value_type>()); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator set_difference( + _InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, + _InputIterator2 __last2, + _OutputIterator __result) { + return std::__set_difference( + __first1, + __last1, + __first2, + __last2, + __result, + __less<typename iterator_traits<_InputIterator1>::value_type, + typename iterator_traits<_InputIterator2>::value_type>()).second; } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h index c4163fcd4c3c..77cc83738d1f 100644 --- a/libcxx/include/__algorithm/set_intersection.h +++ b/libcxx/include/__algorithm/set_intersection.h @@ -11,8 +11,11 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/iterator_operations.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -20,48 +23,76 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator -__set_intersection(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) -{ - while (__first1 != __last1 && __first2 != __last2) - { - if (__comp(*__first1, *__first2)) - ++__first1; - else - { - if (!__comp(*__first2, *__first1)) - { - *__result = *__first1; - ++__result; - ++__first1; - } - ++__first2; - } +template <class _InIter1, class _InIter2, class _OutIter> +struct __set_intersection_result { + _InIter1 __in1_; + _InIter2 __in2_; + _OutIter __out_; + + // need a constructor as C++03 aggregate init is hard + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 + __set_intersection_result(_InIter1&& __in_iter1, _InIter2&& __in_iter2, _OutIter&& __out_iter) + : __in1_(std::move(__in_iter1)), __in2_(std::move(__in_iter2)), __out_(std::move(__out_iter)) {} +}; + +template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 __set_intersection_result<_InIter1, _InIter2, _OutIter> +__set_intersection( + _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) { + while (__first1 != __last1 && __first2 != __last2) { + if (__comp(*__first1, *__first2)) + ++__first1; + else { + if (!__comp(*__first2, *__first1)) { + *__result = *__first1; + ++__result; + ++__first1; + } + ++__first2; } - return __result; + } + + return __set_intersection_result<_InIter1, _InIter2, _OutIter>( + _IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)), + _IterOps<_AlgPolicy>::next(std::move(__first2), std::move(__last2)), + std::move(__result)); } template <class _InputIterator1, class _InputIterator2, class _OutputIterator, class _Compare> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -set_intersection(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - return _VSTD::__set_intersection<_Comp_ref>(__first1, __last1, __first2, __last2, __result, __comp); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator set_intersection( + _InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, + _InputIterator2 __last2, + _OutputIterator __result, + _Compare __comp) { + typedef typename __comp_ref_type<_Compare>::type _Comp_ref; + return std::__set_intersection<_ClassicAlgPolicy, _Comp_ref>( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + std::move(__result), + __comp) + .__out_; } template <class _InputIterator1, class _InputIterator2, class _OutputIterator> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -set_intersection(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result) -{ - return _VSTD::set_intersection(__first1, __last1, __first2, __last2, __result, - __less<typename iterator_traits<_InputIterator1>::value_type, - typename iterator_traits<_InputIterator2>::value_type>()); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator set_intersection( + _InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, + _InputIterator2 __last2, + _OutputIterator __result) { + return std::__set_intersection<_ClassicAlgPolicy>( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + std::move(__result), + __less<typename iterator_traits<_InputIterator1>::value_type, + typename iterator_traits<_InputIterator2>::value_type>()) + .__out_; } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/set_symmetric_difference.h b/libcxx/include/__algorithm/set_symmetric_difference.h index 2dbfb35d7be6..cd532ab5800d 100644 --- a/libcxx/include/__algorithm/set_symmetric_difference.h +++ b/libcxx/include/__algorithm/set_symmetric_difference.h @@ -14,6 +14,7 @@ #include <__algorithm/copy.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,55 +22,81 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator -__set_symmetric_difference(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) -{ - while (__first1 != __last1) - { - if (__first2 == __last2) - return _VSTD::copy(__first1, __last1, __result); - if (__comp(*__first1, *__first2)) - { - *__result = *__first1; - ++__result; - ++__first1; - } - else - { - if (__comp(*__first2, *__first1)) - { - *__result = *__first2; - ++__result; - } - else - ++__first1; - ++__first2; - } +template <class _InIter1, class _InIter2, class _OutIter> +struct __set_symmetric_difference_result { + _InIter1 __in1_; + _InIter2 __in2_; + _OutIter __out_; + + // need a constructor as C++03 aggregate init is hard + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 + __set_symmetric_difference_result(_InIter1&& __in_iter1, _InIter2&& __in_iter2, _OutIter&& __out_iter) + : __in1_(std::move(__in_iter1)), __in2_(std::move(__in_iter2)), __out_(std::move(__out_iter)) {} +}; + +template <class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 __set_symmetric_difference_result<_InIter1, _InIter2, _OutIter> +__set_symmetric_difference( + _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) { + while (__first1 != __last1) { + if (__first2 == __last2) { + auto __ret1 = std::__copy_impl(std::move(__first1), std::move(__last1), std::move(__result)); + return __set_symmetric_difference_result<_InIter1, _InIter2, _OutIter>( + std::move(__ret1.first), std::move(__first2), std::move((__ret1.second))); + } + if (__comp(*__first1, *__first2)) { + *__result = *__first1; + ++__result; + ++__first1; + } else { + if (__comp(*__first2, *__first1)) { + *__result = *__first2; + ++__result; + } else { + ++__first1; + } + ++__first2; } - return _VSTD::copy(__first2, __last2, __result); + } + auto __ret2 = std::__copy_impl(std::move(__first2), std::move(__last2), std::move(__result)); + return __set_symmetric_difference_result<_InIter1, _InIter2, _OutIter>( + std::move(__first1), std::move(__ret2.first), std::move((__ret2.second))); } template <class _InputIterator1, class _InputIterator2, class _OutputIterator, class _Compare> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -set_symmetric_difference(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - return _VSTD::__set_symmetric_difference<_Comp_ref>(__first1, __last1, __first2, __last2, __result, __comp); +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator set_symmetric_difference( + _InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, + _InputIterator2 __last2, + _OutputIterator __result, + _Compare __comp) { + typedef typename __comp_ref_type<_Compare>::type _Comp_ref; + return std::__set_symmetric_difference<_Comp_ref>( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + std::move(__result), + __comp) + .__out_; } template <class _InputIterator1, class _InputIterator2, class _OutputIterator> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -set_symmetric_difference(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result) -{ - return _VSTD::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, - __less<typename iterator_traits<_InputIterator1>::value_type, - typename iterator_traits<_InputIterator2>::value_type>()); +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator set_symmetric_difference( + _InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, + _InputIterator2 __last2, + _OutputIterator __result) { + return std::set_symmetric_difference( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + std::move(__result), + __less<typename iterator_traits<_InputIterator1>::value_type, + typename iterator_traits<_InputIterator2>::value_type>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index f7406a5170e1..76a18215731b 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -17,6 +17,7 @@ #include <__bits> #include <__config> #include <__debug> +#include <__debug_utils/randomize_range.h> #include <__functional/operations.h> #include <__functional/ranges_operations.h> #include <__iterator/iterator_traits.h> @@ -24,10 +25,6 @@ #include <climits> #include <memory> -#if defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY) -# include <__algorithm/shuffle.h> -#endif - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif @@ -582,7 +579,7 @@ extern template _LIBCPP_FUNC_VIS unsigned __sort5<__less<long double>&, long dou template <class _RandomAccessIterator, class _Comp> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void __sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) { - _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last); + std::__debug_randomize_range(__first, __last); using _Comp_ref = typename __comp_ref_type<_Comp>::type; if (__libcpp_is_constant_evaluated()) { std::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp)); diff --git a/libcxx/include/__algorithm/sort_heap.h b/libcxx/include/__algorithm/sort_heap.h index 3a63d744fc1c..261adedd0eaf 100644 --- a/libcxx/include/__algorithm/sort_heap.h +++ b/libcxx/include/__algorithm/sort_heap.h @@ -14,6 +14,7 @@ #include <__algorithm/pop_heap.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__utility/move.h> #include <type_traits> // swap #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -23,29 +24,27 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Compare, class _RandomAccessIterator> -_LIBCPP_CONSTEXPR_AFTER_CXX17 void -__sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) -{ - typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; - for (difference_type __n = __last - __first; __n > 1; --__last, (void) --__n) - _VSTD::__pop_heap<_Compare>(__first, __last, __comp, __n); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +void __sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) { + using _CompRef = typename __comp_ref_type<_Compare>::type; + _CompRef __comp_ref = __comp; + + using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type; + for (difference_type __n = __last - __first; __n > 1; --__last, (void) --__n) + std::__pop_heap<_CompRef>(__first, __last, __comp_ref, __n); } template <class _RandomAccessIterator, class _Compare> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - _VSTD::__sort_heap<_Comp_ref>(__first, __last, __comp); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + std::__sort_heap(std::move(__first), std::move(__last), __comp); } template <class _RandomAccessIterator> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) -{ - _VSTD::sort_heap(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>()); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +void sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) { + std::sort_heap(std::move(__first), std::move(__last), + __less<typename iterator_traits<_RandomAccessIterator>::value_type>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/upper_bound.h b/libcxx/include/__algorithm/upper_bound.h index c6483607e3c6..3fc254873532 100644 --- a/libcxx/include/__algorithm/upper_bound.h +++ b/libcxx/include/__algorithm/upper_bound.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Compare, class _ForwardIterator, class _Tp> _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator -__upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp) +__upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; difference_type __len = _VSTD::distance(__first, __last); @@ -33,7 +33,7 @@ __upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va difference_type __l2 = _VSTD::__half_positive(__len); _ForwardIterator __m = __first; _VSTD::advance(__m, __l2); - if (__comp(__value_, *__m)) + if (__comp(__value, *__m)) __len = __l2; else { @@ -48,18 +48,18 @@ template <class _ForwardIterator, class _Tp, class _Compare> _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator -upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp) +upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { - return _VSTD::__upper_bound<_Compare&>(__first, __last, __value_, __comp); + return _VSTD::__upper_bound<_Compare&>(__first, __last, __value, __comp); } template <class _ForwardIterator, class _Tp> _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator -upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_) +upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - return _VSTD::upper_bound(__first, __last, __value_, + return _VSTD::upper_bound(__first, __last, __value, __less<_Tp, typename iterator_traits<_ForwardIterator>::value_type>()); } diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference index 8c4f1badbd35..f54cb6c16f48 100644 --- a/libcxx/include/__bit_reference +++ b/libcxx/include/__bit_reference @@ -250,9 +250,9 @@ __find_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type template <class _Cp, bool _IsConst, class _Tp> inline _LIBCPP_INLINE_VISIBILITY __bit_iterator<_Cp, _IsConst> -find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value_) +find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value) { - if (static_cast<bool>(__value_)) + if (static_cast<bool>(__value)) return _VSTD::__find_bool_true(__first, static_cast<typename _Cp::size_type>(__last - __first)); return _VSTD::__find_bool_false(__first, static_cast<typename _Cp::size_type>(__last - __first)); } @@ -324,9 +324,9 @@ __count_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_typ template <class _Cp, bool _IsConst, class _Tp> inline _LIBCPP_INLINE_VISIBILITY typename __bit_iterator<_Cp, _IsConst>::difference_type -count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value_) +count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value) { - if (static_cast<bool>(__value_)) + if (static_cast<bool>(__value)) return _VSTD::__count_bool_true(__first, static_cast<typename _Cp::size_type>(__last - __first)); return _VSTD::__count_bool_false(__first, static_cast<typename _Cp::size_type>(__last - __first)); } @@ -396,11 +396,11 @@ __fill_n_true(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) template <class _Cp> inline _LIBCPP_INLINE_VISIBILITY void -fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n, bool __value_) +fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n, bool __value) { if (__n > 0) { - if (__value_) + if (__value) _VSTD::__fill_n_true(__first, __n); else _VSTD::__fill_n_false(__first, __n); @@ -412,9 +412,9 @@ fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n, bool __v template <class _Cp> inline _LIBCPP_INLINE_VISIBILITY void -fill(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __last, bool __value_) +fill(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __last, bool __value) { - _VSTD::fill_n(__first, static_cast<typename _Cp::size_type>(__last - __first), __value_); + _VSTD::fill_n(__first, static_cast<typename _Cp::size_type>(__last - __first), __value); } // copy diff --git a/libcxx/include/__bits b/libcxx/include/__bits index 1eee8f576e9e..92ef5c0a7b49 100644 --- a/libcxx/include/__bits +++ b/libcxx/include/__bits @@ -43,6 +43,23 @@ int __libcpp_clz(unsigned long __x) _NOEXCEPT { return __builtin_clzl(__x); inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT { return __builtin_clzll(__x); } +# ifndef _LIBCPP_HAS_NO_INT128 +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +int __libcpp_clz(__uint128_t __x) _NOEXCEPT { + // The function is written in this form due to C++ constexpr limitations. + // The algorithm: + // - Test whether any bit in the high 64-bits is set + // - No bits set: + // - The high 64-bits contain 64 leading zeros, + // - Add the result of the low 64-bits. + // - Any bits set: + // - The number of leading zeros of the input is the number of leading + // zeros in the high 64-bits. + return ((__x >> 64) == 0) + ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x))) + : __builtin_clzll(static_cast<unsigned long long>(__x >> 64)); +} +# endif inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT { return __builtin_popcount(__x); } diff --git a/libcxx/include/__charconv/tables.h b/libcxx/include/__charconv/tables.h index a2f7f7ce29b3..83f39e6fd2ef 100644 --- a/libcxx/include/__charconv/tables.h +++ b/libcxx/include/__charconv/tables.h @@ -35,6 +35,11 @@ struct __table { static const uint32_t __pow10_32[10]; static const uint64_t __pow10_64[20]; +# ifndef _LIBCPP_HAS_NO_INT128 + // TODO FMT Reduce the number of entries in this table. + static const __uint128_t __pow10_128[40]; + static const int __pow10_128_offset = 0; +# endif static const char __digits_base_10[200]; }; @@ -106,6 +111,51 @@ const uint64_t __table<_Tp>::__pow10_64[20] = {UINT64_C(0), UINT64_C(1000000000000000000), UINT64_C(10000000000000000000)}; +# ifndef _LIBCPP_HAS_NO_INT128 +template <class _Tp> +const __uint128_t __table<_Tp>::__pow10_128[40] = { + UINT64_C(0), + UINT64_C(10), + UINT64_C(100), + UINT64_C(1000), + UINT64_C(10000), + UINT64_C(100000), + UINT64_C(1000000), + UINT64_C(10000000), + UINT64_C(100000000), + UINT64_C(1000000000), + UINT64_C(10000000000), + UINT64_C(100000000000), + UINT64_C(1000000000000), + UINT64_C(10000000000000), + UINT64_C(100000000000000), + UINT64_C(1000000000000000), + UINT64_C(10000000000000000), + UINT64_C(100000000000000000), + UINT64_C(1000000000000000000), + UINT64_C(10000000000000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(100), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(1000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(100000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(1000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(100000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(1000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(100000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(1000000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10000000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(100000000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(1000000000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10000000000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(100000000000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(1000000000000000000), + __uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10000000000000000000), + (__uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10000000000000000000)) * 10}; +# endif + template <class _Tp> const char __table<_Tp>::__digits_base_10[200] = { // clang-format off diff --git a/libcxx/include/__charconv/to_chars_base_10.h b/libcxx/include/__charconv/to_chars_base_10.h index 91c209559aff..d25deffc592f 100644 --- a/libcxx/include/__charconv/to_chars_base_10.h +++ b/libcxx/include/__charconv/to_chars_base_10.h @@ -14,11 +14,15 @@ #include <__charconv/tables.h> #include <__config> #include <cstdint> +#include <limits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #ifndef _LIBCPP_CXX03_LANG @@ -62,7 +66,6 @@ _LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) no return __itoa::__append8(__itoa::__append1(__first, __value / 100000000), __value % 100000000); } -// This function is used for uint32_t and uint64_t. template <class _Tp> _LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) noexcept { return __itoa::__append8(__itoa::__append2(__first, static_cast<uint32_t>(__value / 100000000)), @@ -118,10 +121,65 @@ _LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(char* __buffer, uint64_t __valu return __itoa::__append10(__buffer, __value); } +# ifndef _LIBCPP_HAS_NO_INT128 +/// \returns 10^\a exp +/// +/// \pre \a exp [19, 39] +/// +/// \note The lookup table contains a partial set of exponents limiting the +/// range that can be used. However the range is sufficient for +/// \ref __base_10_u128. +_LIBCPP_HIDE_FROM_ABI inline __uint128_t __pow_10(int __exp) noexcept { + _LIBCPP_ASSERT(__exp >= __table<>::__pow10_128_offset, "Index out of bounds"); + return __table<>::__pow10_128[__exp - __table<>::__pow10_128_offset]; +} + +_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u128(char* __buffer, __uint128_t __value) noexcept { + _LIBCPP_ASSERT( + __value > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true."); + + // Unlike the 64 to 32 bit case the 128 bit case the "upper half" can't be + // stored in the "lower half". Instead we first need to handle the top most + // digits separately. + // + // Maximum unsigned values + // 64 bit 18'446'744'073'709'551'615 (20 digits) + // 128 bit 340'282'366'920'938'463'463'374'607'431'768'211'455 (39 digits) + // step 1 ^ ([0-1] digits) + // step 2 ^^^^^^^^^^^^^^^^^^^^^^^^^ ([0-19] digits) + // step 3 ^^^^^^^^^^^^^^^^^^^^^^^^^ (19 digits) + if (__value >= __itoa::__pow_10(38)) { + // step 1 + __buffer = __itoa::__append1(__buffer, static_cast<uint32_t>(__value / __itoa::__pow_10(38))); + __value %= __itoa::__pow_10(38); + + // step 2 always 19 digits. + // They are handled here since leading zeros need to be appended to the buffer, + __buffer = __itoa::__append9(__buffer, static_cast<uint32_t>(__value / __itoa::__pow_10(29))); + __value %= __itoa::__pow_10(29); + __buffer = __itoa::__append10(__buffer, static_cast<uint64_t>(__value / __itoa::__pow_10(19))); + __value %= __itoa::__pow_10(19); + } + else { + // step 2 + // This version needs to determine the position of the leading non-zero digit. + __buffer = __base_10_u64(__buffer, static_cast<uint64_t>(__value / __itoa::__pow_10(19))); + __value %= __itoa::__pow_10(19); + } + + // Step 3 + __buffer = __itoa::__append9(__buffer, static_cast<uint32_t>(__value / 10000000000)); + __buffer = __itoa::__append10(__buffer, static_cast<uint64_t>(__value % 10000000000)); + + return __buffer; +} +# endif } // namespace __itoa #endif // _LIBCPP_CXX03_LANG _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H diff --git a/libcxx/include/__chrono/day.h b/libcxx/include/__chrono/day.h index 7e425558e359..d9fa4ffbc45e 100644 --- a/libcxx/include/__chrono/day.h +++ b/libcxx/include/__chrono/day.h @@ -12,6 +12,7 @@ #include <__chrono/duration.h> #include <__config> +#include <compare> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -45,25 +46,9 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr bool operator==(const day& __lhs, const day& __rhs) noexcept { return static_cast<unsigned>(__lhs) == static_cast<unsigned>(__rhs); } -_LIBCPP_HIDE_FROM_ABI inline constexpr -bool operator!=(const day& __lhs, const day& __rhs) noexcept -{ return !(__lhs == __rhs); } - -_LIBCPP_HIDE_FROM_ABI inline constexpr -bool operator< (const day& __lhs, const day& __rhs) noexcept -{ return static_cast<unsigned>(__lhs) < static_cast<unsigned>(__rhs); } - -_LIBCPP_HIDE_FROM_ABI inline constexpr -bool operator> (const day& __lhs, const day& __rhs) noexcept -{ return __rhs < __lhs; } - -_LIBCPP_HIDE_FROM_ABI inline constexpr -bool operator<=(const day& __lhs, const day& __rhs) noexcept -{ return !(__rhs < __lhs);} - -_LIBCPP_HIDE_FROM_ABI inline constexpr -bool operator>=(const day& __lhs, const day& __rhs) noexcept -{ return !(__lhs < __rhs); } +_LIBCPP_HIDE_FROM_ABI constexpr strong_ordering operator<=>(const day& __lhs, const day& __rhs) noexcept { + return static_cast<unsigned>(__lhs) <=> static_cast<unsigned>(__rhs); +} _LIBCPP_HIDE_FROM_ABI inline constexpr day operator+ (const day& __lhs, const days& __rhs) noexcept diff --git a/libcxx/include/__chrono/duration.h b/libcxx/include/__chrono/duration.h index f5207594291e..c502574fb267 100644 --- a/libcxx/include/__chrono/duration.h +++ b/libcxx/include/__chrono/duration.h @@ -286,10 +286,10 @@ public: _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 duration& operator+=(const duration& __d) {__rep_ += __d.count(); return *this;} _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 duration& operator-=(const duration& __d) {__rep_ -= __d.count(); return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 duration& operator*=(const rep& rhs) {__rep_ *= rhs; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 duration& operator/=(const rep& rhs) {__rep_ /= rhs; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 duration& operator%=(const rep& rhs) {__rep_ %= rhs; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 duration& operator%=(const duration& rhs) {__rep_ %= rhs.count(); return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 duration& operator*=(const rep& __rhs) {__rep_ *= __rhs; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 duration& operator/=(const rep& __rhs) {__rep_ /= __rhs; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 duration& operator%=(const rep& __rhs) {__rep_ %= __rhs; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 duration& operator%=(const duration& __rhs) {__rep_ %= __rhs.count(); return *this;} // special values diff --git a/libcxx/include/__chrono/time_point.h b/libcxx/include/__chrono/time_point.h index ac2d347a0dca..63d67d77dd05 100644 --- a/libcxx/include/__chrono/time_point.h +++ b/libcxx/include/__chrono/time_point.h @@ -47,12 +47,12 @@ public: // conversions template <class _Duration2> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 - time_point(const time_point<clock, _Duration2>& t, + time_point(const time_point<clock, _Duration2>& __t, typename enable_if < is_convertible<_Duration2, duration>::value >::type* = nullptr) - : __d_(t.time_since_epoch()) {} + : __d_(__t.time_since_epoch()) {} // observer diff --git a/libcxx/include/__chrono/year.h b/libcxx/include/__chrono/year.h index a641fe1c93b0..c7f0027eba7b 100644 --- a/libcxx/include/__chrono/year.h +++ b/libcxx/include/__chrono/year.h @@ -12,6 +12,7 @@ #include <__chrono/duration.h> #include <__config> +#include <limits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -100,9 +101,11 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr year& year::operator-=(const years& __dy) noexcept { *this = *this - __dy; return *this; } -_LIBCPP_HIDE_FROM_ABI inline constexpr -bool year::ok() const noexcept -{ return static_cast<int>(min()) <= __y && __y <= static_cast<int>(max()); } +_LIBCPP_HIDE_FROM_ABI constexpr bool year::ok() const noexcept { + static_assert(static_cast<int>(std::numeric_limits<decltype(__y)>::max()) == static_cast<int>(max())); + return static_cast<int>(min()) <= __y; +} + } // namespace chrono _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__chrono/year_month_weekday.h b/libcxx/include/__chrono/year_month_weekday.h index 9ba81e7e3f65..b69b77152fb1 100644 --- a/libcxx/include/__chrono/year_month_weekday.h +++ b/libcxx/include/__chrono/year_month_weekday.h @@ -47,10 +47,10 @@ public: : year_month_weekday(__from_days(__sysd.time_since_epoch())) {} _LIBCPP_HIDE_FROM_ABI inline explicit constexpr year_month_weekday(const local_days& __locd) noexcept : year_month_weekday(__from_days(__locd.time_since_epoch())) {} - _LIBCPP_HIDE_FROM_ABI constexpr year_month_weekday& operator+=(const months& m) noexcept; - _LIBCPP_HIDE_FROM_ABI constexpr year_month_weekday& operator-=(const months& m) noexcept; - _LIBCPP_HIDE_FROM_ABI constexpr year_month_weekday& operator+=(const years& y) noexcept; - _LIBCPP_HIDE_FROM_ABI constexpr year_month_weekday& operator-=(const years& y) noexcept; + _LIBCPP_HIDE_FROM_ABI constexpr year_month_weekday& operator+=(const months&) noexcept; + _LIBCPP_HIDE_FROM_ABI constexpr year_month_weekday& operator-=(const months&) noexcept; + _LIBCPP_HIDE_FROM_ABI constexpr year_month_weekday& operator+=(const years&) noexcept; + _LIBCPP_HIDE_FROM_ABI constexpr year_month_weekday& operator-=(const years&) noexcept; _LIBCPP_HIDE_FROM_ABI inline constexpr chrono::year year() const noexcept { return __y; } _LIBCPP_HIDE_FROM_ABI inline constexpr chrono::month month() const noexcept { return __m; } diff --git a/libcxx/include/__config b/libcxx/include/__config index e4b7d25edf34..22c2ed7fd87b 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -26,6 +26,13 @@ # define _LIBCPP_VERSION 15000 +# define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y +# define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) + +// Valid C++ identifier that revs with every libc++ version. This can be used to +// generate identifiers that must be unique for every released libc++ version. +# define _LIBCPP_VERSIONED_IDENTIFIER _LIBCPP_CONCAT(v, _LIBCPP_VERSION) + # if __STDC_HOSTED__ == 0 # define _LIBCPP_FREESTANDING # endif @@ -568,12 +575,6 @@ typedef __char32_t char32_t; # endif // defined(_LIBCPP_OBJECT_FORMAT_COFF) -# if __has_attribute(internal_linkage) -# define _LIBCPP_INTERNAL_LINKAGE __attribute__((internal_linkage)) -# else -# define _LIBCPP_INTERNAL_LINKAGE _LIBCPP_ALWAYS_INLINE -# endif - # if __has_attribute(exclude_from_explicit_instantiation) # define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION __attribute__((__exclude_from_explicit_instantiation__)) # else @@ -583,20 +584,35 @@ typedef __char32_t char32_t; # define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCPP_ALWAYS_INLINE # endif -# ifndef _LIBCPP_HIDE_FROM_ABI_PER_TU -# ifndef _LIBCPP_HIDE_FROM_ABI_PER_TU_BY_DEFAULT -# define _LIBCPP_HIDE_FROM_ABI_PER_TU 0 -# else -# define _LIBCPP_HIDE_FROM_ABI_PER_TU 1 -# endif -# endif - -# ifndef _LIBCPP_HIDE_FROM_ABI -# if _LIBCPP_HIDE_FROM_ABI_PER_TU -# define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_INTERNAL_LINKAGE -# else -# define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION -# endif +// This macro marks a symbol as being hidden from libc++'s ABI. This is achieved +// on two levels: +// 1. The symbol is given hidden visibility, which ensures that users won't start exporting +// symbols from their dynamic library by means of using the libc++ headers. This ensures +// that those symbols stay private to the dynamic library in which it is defined. +// +// 2. The symbol is given an ABI tag that changes with each version of libc++. This ensures +// that no ODR violation can arise from mixing two TUs compiled with different versions +// of libc++ where we would have changed the definition of a symbol. If the symbols shared +// the same name, the ODR would require that their definitions be token-by-token equivalent, +// which basically prevents us from being able to make any change to any function in our +// headers. Using this ABI tag ensures that the symbol name is "bumped" artificially at +// each release, which lets us change the definition of these symbols at our leisure. +// Note that historically, this has been achieved in various ways, including force-inlining +// all functions or giving internal linkage to all functions. Both these (previous) solutions +// suffer from drawbacks that lead notably to code bloat. +// +// Note that we use _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION to ensure that we don't depend +// on _LIBCPP_HIDE_FROM_ABI methods of classes explicitly instantiated in the dynamic library. +// +// TODO: We provide a escape hatch with _LIBCPP_NO_ABI_TAG for folks who want to avoid increasing +// the length of symbols with an ABI tag. In practice, we should remove the escape hatch and +// use compression mangling instead, see https://github.com/itanium-cxx-abi/cxx-abi/issues/70. +# ifndef _LIBCPP_NO_ABI_TAG +# define _LIBCPP_HIDE_FROM_ABI \ + _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION \ + __attribute__((__abi_tag__(_LIBCPP_TOSTRING(_LIBCPP_VERSIONED_IDENTIFIER)))) +# else +# define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION # endif # ifdef _LIBCPP_BUILDING_LIBRARY diff --git a/libcxx/include/__debug b/libcxx/include/__debug index d3dd202b54ab..59e85cb7d1bc 100644 --- a/libcxx/include/__debug +++ b/libcxx/include/__debug @@ -28,22 +28,6 @@ # define _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY #endif -// TODO: Define this as a function instead -#if defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY) -# if defined(_LIBCPP_CXX03_LANG) -# error Support for unspecified stability is only for C++11 and higher -# endif -# define _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last) \ - do { \ - if (!__builtin_is_constant_evaluated()) \ - std::shuffle(__first, __last, __libcpp_debug_randomizer()); \ - } while (false) -#else -# define _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last) \ - do { \ - } while (false) -#endif - #ifdef _LIBCPP_ENABLE_DEBUG_MODE # define _LIBCPP_DEBUG_ASSERT(x, m) _LIBCPP_ASSERT(::std::__libcpp_is_constant_evaluated() || (x), m) #else diff --git a/libcxx/include/__debug_utils/randomize_range.h b/libcxx/include/__debug_utils/randomize_range.h new file mode 100644 index 000000000000..fd5b9e588493 --- /dev/null +++ b/libcxx/include/__debug_utils/randomize_range.h @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LIBCXX_DEBUG_RANDOMIZE_RANGE_H +#define _LIBCPP___LIBCXX_DEBUG_RANDOMIZE_RANGE_H + +#include <__config> + +#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY +# include <__algorithm/shuffle.h> +# include <__type_traits/is_constant_evaluated.h> +#endif + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <class _Iterator> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 void __debug_randomize_range(_Iterator __first, _Iterator __last) { +#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY +# ifdef _LIBCPP_CXX03_LANG +# error Support for unspecified stability is only for C++11 and higher +# endif + + if (!__libcpp_is_constant_evaluated()) + std::shuffle(__first, __last, __libcpp_debug_randomizer()); +#else + (void)__first; + (void)__last; +#endif +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___LIBCXX_DEBUG_RANDOMIZE_RANGE_H diff --git a/libcxx/include/__filesystem/copy_options.h b/libcxx/include/__filesystem/copy_options.h index 2e037403f6f2..96c7535812e2 100644 --- a/libcxx/include/__filesystem/copy_options.h +++ b/libcxx/include/__filesystem/copy_options.h @@ -38,41 +38,41 @@ enum class _LIBCPP_ENUM_VIS copy_options : unsigned short { }; _LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator&(copy_options _LHS, copy_options _RHS) { - return static_cast<copy_options>(static_cast<unsigned short>(_LHS) & - static_cast<unsigned short>(_RHS)); +inline constexpr copy_options operator&(copy_options __lhs, copy_options __rhs) { + return static_cast<copy_options>(static_cast<unsigned short>(__lhs) & + static_cast<unsigned short>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator|(copy_options _LHS, copy_options _RHS) { - return static_cast<copy_options>(static_cast<unsigned short>(_LHS) | - static_cast<unsigned short>(_RHS)); +inline constexpr copy_options operator|(copy_options __lhs, copy_options __rhs) { + return static_cast<copy_options>(static_cast<unsigned short>(__lhs) | + static_cast<unsigned short>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator^(copy_options _LHS, copy_options _RHS) { - return static_cast<copy_options>(static_cast<unsigned short>(_LHS) ^ - static_cast<unsigned short>(_RHS)); +inline constexpr copy_options operator^(copy_options __lhs, copy_options __rhs) { + return static_cast<copy_options>(static_cast<unsigned short>(__lhs) ^ + static_cast<unsigned short>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator~(copy_options _LHS) { - return static_cast<copy_options>(~static_cast<unsigned short>(_LHS)); +inline constexpr copy_options operator~(copy_options __lhs) { + return static_cast<copy_options>(~static_cast<unsigned short>(__lhs)); } _LIBCPP_INLINE_VISIBILITY -inline copy_options& operator&=(copy_options& _LHS, copy_options _RHS) { - return _LHS = _LHS & _RHS; +inline copy_options& operator&=(copy_options& __lhs, copy_options __rhs) { + return __lhs = __lhs & __rhs; } _LIBCPP_INLINE_VISIBILITY -inline copy_options& operator|=(copy_options& _LHS, copy_options _RHS) { - return _LHS = _LHS | _RHS; +inline copy_options& operator|=(copy_options& __lhs, copy_options __rhs) { + return __lhs = __lhs | __rhs; } _LIBCPP_INLINE_VISIBILITY -inline copy_options& operator^=(copy_options& _LHS, copy_options _RHS) { - return _LHS = _LHS ^ _RHS; +inline copy_options& operator^=(copy_options& __lhs, copy_options __rhs) { + return __lhs = __lhs ^ __rhs; } _LIBCPP_AVAILABILITY_FILESYSTEM_POP diff --git a/libcxx/include/__filesystem/directory_options.h b/libcxx/include/__filesystem/directory_options.h index d3f8cc1deb21..c5c031a567cf 100644 --- a/libcxx/include/__filesystem/directory_options.h +++ b/libcxx/include/__filesystem/directory_options.h @@ -30,47 +30,47 @@ enum class _LIBCPP_ENUM_VIS directory_options : unsigned char { }; _LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator&(directory_options _LHS, - directory_options _RHS) { - return static_cast<directory_options>(static_cast<unsigned char>(_LHS) & - static_cast<unsigned char>(_RHS)); +inline constexpr directory_options operator&(directory_options __lhs, + directory_options __rhs) { + return static_cast<directory_options>(static_cast<unsigned char>(__lhs) & + static_cast<unsigned char>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator|(directory_options _LHS, - directory_options _RHS) { - return static_cast<directory_options>(static_cast<unsigned char>(_LHS) | - static_cast<unsigned char>(_RHS)); +inline constexpr directory_options operator|(directory_options __lhs, + directory_options __rhs) { + return static_cast<directory_options>(static_cast<unsigned char>(__lhs) | + static_cast<unsigned char>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator^(directory_options _LHS, - directory_options _RHS) { - return static_cast<directory_options>(static_cast<unsigned char>(_LHS) ^ - static_cast<unsigned char>(_RHS)); +inline constexpr directory_options operator^(directory_options __lhs, + directory_options __rhs) { + return static_cast<directory_options>(static_cast<unsigned char>(__lhs) ^ + static_cast<unsigned char>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator~(directory_options _LHS) { - return static_cast<directory_options>(~static_cast<unsigned char>(_LHS)); +inline constexpr directory_options operator~(directory_options __lhs) { + return static_cast<directory_options>(~static_cast<unsigned char>(__lhs)); } _LIBCPP_INLINE_VISIBILITY -inline directory_options& operator&=(directory_options& _LHS, - directory_options _RHS) { - return _LHS = _LHS & _RHS; +inline directory_options& operator&=(directory_options& __lhs, + directory_options __rhs) { + return __lhs = __lhs & __rhs; } _LIBCPP_INLINE_VISIBILITY -inline directory_options& operator|=(directory_options& _LHS, - directory_options _RHS) { - return _LHS = _LHS | _RHS; +inline directory_options& operator|=(directory_options& __lhs, + directory_options __rhs) { + return __lhs = __lhs | __rhs; } _LIBCPP_INLINE_VISIBILITY -inline directory_options& operator^=(directory_options& _LHS, - directory_options _RHS) { - return _LHS = _LHS ^ _RHS; +inline directory_options& operator^=(directory_options& __lhs, + directory_options __rhs) { + return __lhs = __lhs ^ __rhs; } _LIBCPP_AVAILABILITY_FILESYSTEM_POP diff --git a/libcxx/include/__filesystem/operations.h b/libcxx/include/__filesystem/operations.h index 85c71f017f34..f48d301d090c 100644 --- a/libcxx/include/__filesystem/operations.h +++ b/libcxx/include/__filesystem/operations.h @@ -39,10 +39,10 @@ _LIBCPP_FUNC_VIS path __canonical(const path&, error_code* __ec = nullptr); _LIBCPP_FUNC_VIS bool __copy_file(const path& __from, const path& __to, copy_options __opt, error_code* __ec = nullptr); _LIBCPP_FUNC_VIS void __copy_symlink(const path& __existing_symlink, const path& __new_symlink, error_code* __ec = nullptr); _LIBCPP_FUNC_VIS void __copy(const path& __from, const path& __to, copy_options __opt, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS bool __create_directories(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS bool __create_directories(const path&, error_code* = nullptr); _LIBCPP_FUNC_VIS void __create_directory_symlink(const path& __to, const path& __new_symlink, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS bool __create_directory(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS bool __create_directory(const path& p, const path& attributes, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS bool __create_directory(const path&, error_code* = nullptr); +_LIBCPP_FUNC_VIS bool __create_directory(const path&, const path& __attributes, error_code* = nullptr); _LIBCPP_FUNC_VIS void __create_hard_link(const path& __to, const path& __new_hard_link, error_code* __ec = nullptr); _LIBCPP_FUNC_VIS void __create_symlink(const path& __to, const path& __new_symlink, error_code* __ec = nullptr); _LIBCPP_FUNC_VIS path __current_path(error_code* __ec = nullptr); @@ -52,14 +52,14 @@ _LIBCPP_FUNC_VIS file_status __status(const path&, error_code* __ec = nullptr); _LIBCPP_FUNC_VIS uintmax_t __file_size(const path&, error_code* __ec = nullptr); _LIBCPP_FUNC_VIS uintmax_t __hard_link_count(const path&, error_code* __ec = nullptr); _LIBCPP_FUNC_VIS file_status __symlink_status(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS file_time_type __last_write_time(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS void __last_write_time(const path& p, file_time_type new_time, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS file_time_type __last_write_time(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS void __last_write_time(const path&, file_time_type __new_time, error_code* __ec = nullptr); _LIBCPP_FUNC_VIS path __weakly_canonical(path const& __p, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS path __read_symlink(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS uintmax_t __remove_all(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS bool __remove(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS void __rename(const path& from, const path& to, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS void __resize_file(const path& p, uintmax_t size, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS path __read_symlink(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS uintmax_t __remove_all(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS bool __remove(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS void __rename(const path& __from, const path& __to, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS void __resize_file(const path&, uintmax_t __size, error_code* = nullptr); _LIBCPP_FUNC_VIS path __temp_directory_path(error_code* __ec = nullptr); inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p) { return __absolute(__p); } @@ -118,7 +118,7 @@ inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(const path& __p, error_code& inline _LIBCPP_HIDE_FROM_ABI bool is_directory(file_status __s) noexcept { return __s.type() == file_type::directory; } inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p) { return is_directory(__status(__p)); } inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p, error_code& __ec) noexcept { return is_directory(__status(__p, &__ec)); } -_LIBCPP_FUNC_VIS bool __fs_is_empty(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS bool __fs_is_empty(const path& __p, error_code* __ec = nullptr); inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p) { return __fs_is_empty(__p); } inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p, error_code& __ec) { return __fs_is_empty(__p, &__ec); } inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(file_status __s) noexcept { return __s.type() == file_type::fifo; } diff --git a/libcxx/include/__filesystem/perm_options.h b/libcxx/include/__filesystem/perm_options.h index f7580a2473d0..4aba302edfbe 100644 --- a/libcxx/include/__filesystem/perm_options.h +++ b/libcxx/include/__filesystem/perm_options.h @@ -31,41 +31,41 @@ enum class _LIBCPP_ENUM_VIS perm_options : unsigned char { }; _LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator&(perm_options _LHS, perm_options _RHS) { - return static_cast<perm_options>(static_cast<unsigned>(_LHS) & - static_cast<unsigned>(_RHS)); +inline constexpr perm_options operator&(perm_options __lhs, perm_options __rhs) { + return static_cast<perm_options>(static_cast<unsigned>(__lhs) & + static_cast<unsigned>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator|(perm_options _LHS, perm_options _RHS) { - return static_cast<perm_options>(static_cast<unsigned>(_LHS) | - static_cast<unsigned>(_RHS)); +inline constexpr perm_options operator|(perm_options __lhs, perm_options __rhs) { + return static_cast<perm_options>(static_cast<unsigned>(__lhs) | + static_cast<unsigned>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator^(perm_options _LHS, perm_options _RHS) { - return static_cast<perm_options>(static_cast<unsigned>(_LHS) ^ - static_cast<unsigned>(_RHS)); +inline constexpr perm_options operator^(perm_options __lhs, perm_options __rhs) { + return static_cast<perm_options>(static_cast<unsigned>(__lhs) ^ + static_cast<unsigned>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator~(perm_options _LHS) { - return static_cast<perm_options>(~static_cast<unsigned>(_LHS)); +inline constexpr perm_options operator~(perm_options __lhs) { + return static_cast<perm_options>(~static_cast<unsigned>(__lhs)); } _LIBCPP_INLINE_VISIBILITY -inline perm_options& operator&=(perm_options& _LHS, perm_options _RHS) { - return _LHS = _LHS & _RHS; +inline perm_options& operator&=(perm_options& __lhs, perm_options __rhs) { + return __lhs = __lhs & __rhs; } _LIBCPP_INLINE_VISIBILITY -inline perm_options& operator|=(perm_options& _LHS, perm_options _RHS) { - return _LHS = _LHS | _RHS; +inline perm_options& operator|=(perm_options& __lhs, perm_options __rhs) { + return __lhs = __lhs | __rhs; } _LIBCPP_INLINE_VISIBILITY -inline perm_options& operator^=(perm_options& _LHS, perm_options _RHS) { - return _LHS = _LHS ^ _RHS; +inline perm_options& operator^=(perm_options& __lhs, perm_options __rhs) { + return __lhs = __lhs ^ __rhs; } _LIBCPP_AVAILABILITY_FILESYSTEM_POP diff --git a/libcxx/include/__filesystem/perms.h b/libcxx/include/__filesystem/perms.h index 0e5c7ed8d2e9..df4590057ee1 100644 --- a/libcxx/include/__filesystem/perms.h +++ b/libcxx/include/__filesystem/perms.h @@ -55,36 +55,36 @@ enum class _LIBCPP_ENUM_VIS perms : unsigned { }; _LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator&(perms _LHS, perms _RHS) { - return static_cast<perms>(static_cast<unsigned>(_LHS) & - static_cast<unsigned>(_RHS)); +inline constexpr perms operator&(perms __lhs, perms __rhs) { + return static_cast<perms>(static_cast<unsigned>(__lhs) & + static_cast<unsigned>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator|(perms _LHS, perms _RHS) { - return static_cast<perms>(static_cast<unsigned>(_LHS) | - static_cast<unsigned>(_RHS)); +inline constexpr perms operator|(perms __lhs, perms __rhs) { + return static_cast<perms>(static_cast<unsigned>(__lhs) | + static_cast<unsigned>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator^(perms _LHS, perms _RHS) { - return static_cast<perms>(static_cast<unsigned>(_LHS) ^ - static_cast<unsigned>(_RHS)); +inline constexpr perms operator^(perms __lhs, perms __rhs) { + return static_cast<perms>(static_cast<unsigned>(__lhs) ^ + static_cast<unsigned>(__rhs)); } _LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator~(perms _LHS) { - return static_cast<perms>(~static_cast<unsigned>(_LHS)); +inline constexpr perms operator~(perms __lhs) { + return static_cast<perms>(~static_cast<unsigned>(__lhs)); } _LIBCPP_INLINE_VISIBILITY -inline perms& operator&=(perms& _LHS, perms _RHS) { return _LHS = _LHS & _RHS; } +inline perms& operator&=(perms& __lhs, perms __rhs) { return __lhs = __lhs & __rhs; } _LIBCPP_INLINE_VISIBILITY -inline perms& operator|=(perms& _LHS, perms _RHS) { return _LHS = _LHS | _RHS; } +inline perms& operator|=(perms& __lhs, perms __rhs) { return __lhs = __lhs | __rhs; } _LIBCPP_INLINE_VISIBILITY -inline perms& operator^=(perms& _LHS, perms _RHS) { return _LHS = _LHS ^ _RHS; } +inline perms& operator^=(perms& __lhs, perms __rhs) { return __lhs = __lhs ^ __rhs; } _LIBCPP_AVAILABILITY_FILESYSTEM_POP diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index 3f2afc898d2c..4f93024b7c69 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -147,15 +147,20 @@ public: /// Contains the implementation for basic_format_arg::handle. struct __handle { template <class _Tp> - _LIBCPP_HIDE_FROM_ABI explicit __handle(const _Tp& __v) noexcept + _LIBCPP_HIDE_FROM_ABI explicit __handle(_Tp&& __v) noexcept : __ptr_(_VSTD::addressof(__v)), __format_([](basic_format_parse_context<_CharT>& __parse_ctx, _Context& __ctx, const void* __ptr) { - using _Formatter = typename _Context::template formatter_type<_Tp>; - using _Qp = conditional_t<requires { _Formatter().format(declval<const _Tp&>(), declval<_Context&>()); }, - const _Tp, _Tp>; + using _Dp = remove_cvref_t<_Tp>; + using _Formatter = typename _Context::template formatter_type<_Dp>; + constexpr bool __const_formattable = + requires { _Formatter().format(declval<const _Dp&>(), declval<_Context&>()); }; + using _Qp = conditional_t<__const_formattable, const _Dp, _Dp>; + + static_assert(__const_formattable || !is_const_v<remove_reference_t<_Tp>>, "Mandated by [format.arg]/18"); + _Formatter __f; __parse_ctx.advance_to(__f.parse(__parse_ctx)); - __ctx.advance_to(__f.format(*const_cast<_Qp*>(static_cast<const _Tp*>(__ptr)), __ctx)); + __ctx.advance_to(__f.format(*const_cast<_Qp*>(static_cast<const _Dp*>(__ptr)), __ctx)); }) {} const void* __ptr_; @@ -205,7 +210,9 @@ public: _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(basic_string_view<_CharT> __value) noexcept : __string_view_(__value) {} _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(const void* __value) noexcept : __ptr_(__value) {} - _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(__handle __value) noexcept : __handle_(__value) {} + _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(__handle __value) noexcept + // TODO FMT Investigate why it doesn't work without the forward. + : __handle_(std::forward<__handle>(__value)) {} }; template <class _Context> @@ -251,11 +258,11 @@ public: __handle_.__format_(__parse_ctx, __ctx, __handle_.__ptr_); } - _LIBCPP_HIDE_FROM_ABI explicit handle(typename __basic_format_arg_value<_Context>::__handle __handle) noexcept + _LIBCPP_HIDE_FROM_ABI explicit handle(typename __basic_format_arg_value<_Context>::__handle& __handle) noexcept : __handle_(__handle) {} private: - typename __basic_format_arg_value<_Context>::__handle __handle_; + typename __basic_format_arg_value<_Context>::__handle& __handle_; }; #endif //_LIBCPP_STD_VER > 17 diff --git a/libcxx/include/__format/format_arg_store.h b/libcxx/include/__format/format_arg_store.h index 6602dfeb956b..26a5e71b93af 100644 --- a/libcxx/include/__format/format_arg_store.h +++ b/libcxx/include/__format/format_arg_store.h @@ -197,7 +197,7 @@ _LIBCPP_HIDE_FROM_ABI void __create_packed_storage(uint64_t& __types, __basic_fo int __shift = 0; ( [&] { - basic_format_arg<_Context> __arg = __create_format_arg<_Context>(_VSTD::forward<_Args>(__args)); + basic_format_arg<_Context> __arg = __create_format_arg<_Context>(__args); if (__shift != 0) __types |= static_cast<uint64_t>(__arg.__type_) << __shift; else @@ -211,7 +211,7 @@ _LIBCPP_HIDE_FROM_ABI void __create_packed_storage(uint64_t& __types, __basic_fo template <class _Context, class... _Args> _LIBCPP_HIDE_FROM_ABI void __store_basic_format_arg(basic_format_arg<_Context>* __data, _Args&&... __args) noexcept { - ([&] { *__data++ = __create_format_arg<_Context>(_VSTD::forward<_Args>(__args)); }(), ...); + ([&] { *__data++ = __create_format_arg<_Context>(__args); }(), ...); } template <class _Context, size_t N> @@ -230,12 +230,12 @@ struct __unpacked_format_arg_store { template <class _Context, class... _Args> struct _LIBCPP_TEMPLATE_VIS __format_arg_store { _LIBCPP_HIDE_FROM_ABI - __format_arg_store(_Args&&... __args) noexcept { + __format_arg_store(_Args&... __args) noexcept { if constexpr (sizeof...(_Args) != 0) { if constexpr (__format::__use_packed_format_arg_store(sizeof...(_Args))) - __format::__create_packed_storage(__storage.__types_, __storage.__values_, _VSTD::forward<_Args>(__args)...); + __format::__create_packed_storage(__storage.__types_, __storage.__values_, __args...); else - __format::__store_basic_format_arg<_Context>(__storage.__args_, _VSTD::forward<_Args>(__args)...); + __format::__store_basic_format_arg<_Context>(__storage.__args_, __args...); } } diff --git a/libcxx/include/__format/formatter.h b/libcxx/include/__format/formatter.h index c39e25b354eb..4816f961c445 100644 --- a/libcxx/include/__format/formatter.h +++ b/libcxx/include/__format/formatter.h @@ -10,20 +10,10 @@ #ifndef _LIBCPP___FORMAT_FORMATTER_H #define _LIBCPP___FORMAT_FORMATTER_H -#include <__algorithm/copy.h> -#include <__algorithm/fill_n.h> -#include <__algorithm/transform.h> -#include <__assert> #include <__availability> #include <__concepts/same_as.h> #include <__config> -#include <__format/format_error.h> #include <__format/format_fwd.h> -#include <__format/format_string.h> -#include <__format/parser_std_format_spec.h> -#include <__utility/move.h> -#include <__utility/unreachable.h> -#include <string_view> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -49,229 +39,12 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter { formatter& operator=(const formatter&) = delete; }; -namespace __format_spec { - -_LIBCPP_HIDE_FROM_ABI inline char* __insert_sign(char* __buf, bool __negative, - _Flags::_Sign __sign) { - if (__negative) - *__buf++ = '-'; - else - switch (__sign) { - case _Flags::_Sign::__default: - case _Flags::_Sign::__minus: - // No sign added. - break; - case _Flags::_Sign::__plus: - *__buf++ = '+'; - break; - case _Flags::_Sign::__space: - *__buf++ = ' '; - break; - } - - return __buf; -} - -_LIBCPP_HIDE_FROM_ABI constexpr char __hex_to_upper(char c) { - switch (c) { - case 'a': - return 'A'; - case 'b': - return 'B'; - case 'c': - return 'C'; - case 'd': - return 'D'; - case 'e': - return 'E'; - case 'f': - return 'F'; - } - return c; -} - -} // namespace __format_spec - namespace __formatter { /** The character types that formatters are specialized for. */ template <class _CharT> concept __char_type = same_as<_CharT, char> || same_as<_CharT, wchar_t>; -struct _LIBCPP_TEMPLATE_VIS __padding_size_result { - size_t __before; - size_t __after; -}; - -_LIBCPP_HIDE_FROM_ABI constexpr __padding_size_result -__padding_size(size_t __size, size_t __width, - __format_spec::_Flags::_Alignment __align) { - _LIBCPP_ASSERT(__width > __size, - "Don't call this function when no padding is required"); - _LIBCPP_ASSERT( - __align != __format_spec::_Flags::_Alignment::__default, - "Caller should adjust the default to the value required by the type"); - - size_t __fill = __width - __size; - switch (__align) { - case __format_spec::_Flags::_Alignment::__default: - __libcpp_unreachable(); - - case __format_spec::_Flags::_Alignment::__left: - return {0, __fill}; - - case __format_spec::_Flags::_Alignment::__center: { - // The extra padding is divided per [format.string.std]/3 - // __before = floor(__fill, 2); - // __after = ceil(__fill, 2); - size_t __before = __fill / 2; - size_t __after = __fill - __before; - return {__before, __after}; - } - case __format_spec::_Flags::_Alignment::__right: - return {__fill, 0}; - } - __libcpp_unreachable(); -} - -/** - * Writes the input to the output with the required padding. - * - * Since the output column width is specified the function can be used for - * ASCII and Unicode input. - * - * @pre [@a __first, @a __last) is a valid range. - * @pre @a __size <= @a __width. Using this function when this pre-condition - * doesn't hold incurs an unwanted overhead. - * - * @param __out_it The output iterator to write to. - * @param __first Pointer to the first element to write. - * @param __last Pointer beyond the last element to write. - * @param __size The (estimated) output column width. When the elements - * to be written are ASCII the following condition holds - * @a __size == @a __last - @a __first. - * @param __width The number of output columns to write. - * @param __fill The character used for the alignment of the output. - * TODO FMT Will probably change to support Unicode grapheme - * cluster. - * @param __alignment The requested alignment. - * - * @returns An iterator pointing beyond the last element written. - * - * @note The type of the elements in range [@a __first, @a __last) can differ - * from the type of @a __fill. Integer output uses @c std::to_chars for its - * conversion, which means the [@a __first, @a __last) always contains elements - * of the type @c char. - */ -template <class _CharT, class _Fill> -_LIBCPP_HIDE_FROM_ABI auto -__write(output_iterator<const _CharT&> auto __out_it, const _CharT* __first, - const _CharT* __last, size_t __size, size_t __width, _Fill __fill, - __format_spec::_Flags::_Alignment __alignment) -> decltype(__out_it) { - - _LIBCPP_ASSERT(__first <= __last, "Not a valid range"); - _LIBCPP_ASSERT(__size < __width, "Precondition failure"); - - __padding_size_result __padding = - __padding_size(__size, __width, __alignment); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, __fill); - __out_it = _VSTD::copy(__first, __last, _VSTD::move(__out_it)); - return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill); -} - -/** - * @overload - * - * Writes additional zero's for the precision before the exponent. - * This is used when the precision requested in the format string is larger - * than the maximum precision of the floating-point type. These precision - * digits are always 0. - * - * @param __exponent The location of the exponent character. - * @param __num_trailing_zeros The number of 0's to write before the exponent - * character. - */ -template <class _CharT, class _Fill> -_LIBCPP_HIDE_FROM_ABI auto __write(output_iterator<const _CharT&> auto __out_it, const _CharT* __first, - const _CharT* __last, size_t __size, size_t __width, _Fill __fill, - __format_spec::_Flags::_Alignment __alignment, const _CharT* __exponent, - size_t __num_trailing_zeros) -> decltype(__out_it) { - _LIBCPP_ASSERT(__first <= __last, "Not a valid range"); - _LIBCPP_ASSERT(__num_trailing_zeros > 0, "The overload not writing trailing zeros should have been used"); - - __padding_size_result __padding = __padding_size(__size + __num_trailing_zeros, __width, __alignment); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, __fill); - __out_it = _VSTD::copy(__first, __exponent, _VSTD::move(__out_it)); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0')); - __out_it = _VSTD::copy(__exponent, __last, _VSTD::move(__out_it)); - return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill); -} - -/** - * @overload - * - * Uses a transformation operation before writing an element. - * - * TODO FMT Fill will probably change to support Unicode grapheme cluster. - */ -template <class _CharT, class _UnaryOperation, class _Fill> -_LIBCPP_HIDE_FROM_ABI auto -__write(output_iterator<const _CharT&> auto __out_it, const _CharT* __first, - const _CharT* __last, size_t __size, _UnaryOperation __op, - size_t __width, _Fill __fill, - __format_spec::_Flags::_Alignment __alignment) -> decltype(__out_it) { - - _LIBCPP_ASSERT(__first <= __last, "Not a valid range"); - _LIBCPP_ASSERT(__size < __width, "Precondition failure"); - - __padding_size_result __padding = - __padding_size(__size, __width, __alignment); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, __fill); - __out_it = _VSTD::transform(__first, __last, _VSTD::move(__out_it), __op); - return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill); -} - -/** - * Writes Unicode input to the output with the required padding. - * - * This function does almost the same as the @ref __write function, but handles - * the width estimation of the Unicode input. - * - * @param __str The range [@a __first, @a __last). - * @param __precision The width to truncate the input string to, use @c -1 for - * no limit. - */ -template <class _CharT, class _Fill> -_LIBCPP_HIDE_FROM_ABI auto -__write_unicode(output_iterator<const _CharT&> auto __out_it, - basic_string_view<_CharT> __str, ptrdiff_t __width, - ptrdiff_t __precision, _Fill __fill, - __format_spec::_Flags::_Alignment __alignment) - -> decltype(__out_it) { - - // This value changes when there Unicode column width limits the output - // size. - auto __last = __str.end(); - if (__width != 0 || __precision != -1) { - __format_spec::__string_alignment<_CharT> __format_traits = - __format_spec::__get_string_alignment(__str.begin(), __str.end(), - __width, __precision); - - if (__format_traits.__align) - return __write(_VSTD::move(__out_it), __str.begin(), - __format_traits.__last, __format_traits.__size, __width, - __fill, __alignment); - - // No alignment required update the output based on the precision. - // This might be the same as __str.end(). - __last = __format_traits.__last; - } - - // Copy the input to the output. The output size might be limited by the - // precision. - return _VSTD::copy(__str.begin(), __last, _VSTD::move(__out_it)); -} - } // namespace __formatter #endif //_LIBCPP_STD_VER > 17 diff --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h index 4c9d3fc77473..cdb0631f87d4 100644 --- a/libcxx/include/__format/formatter_bool.h +++ b/libcxx/include/__format/formatter_bool.h @@ -47,6 +47,7 @@ public: _LIBCPP_HIDE_FROM_ABI auto format(bool __value, auto& __ctx) const -> decltype(__ctx.out()) { switch (__parser_.__type_) { + case __format_spec::__type::__default: case __format_spec::__type::__string: return __formatter::__format_bool(__value, __ctx, __parser_.__get_parsed_std_specifications(__ctx)); diff --git a/libcxx/include/__format/formatter_char.h b/libcxx/include/__format/formatter_char.h index cd54abba348a..a3ca36ec0a62 100644 --- a/libcxx/include/__format/formatter_char.h +++ b/libcxx/include/__format/formatter_char.h @@ -41,7 +41,7 @@ public: } _LIBCPP_HIDE_FROM_ABI auto format(_CharT __value, auto& __ctx) const -> decltype(__ctx.out()) { - if (__parser_.__type_ == __format_spec::__type::__char) + if (__parser_.__type_ == __format_spec::__type::__default || __parser_.__type_ == __format_spec::__type::__char) return __formatter::__format_char(__value, __ctx.out(), __parser_.__get_parsed_std_specifications(__ctx)); if constexpr (sizeof(_CharT) <= sizeof(int)) diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h index c9f5689abd8b..90a76193196e 100644 --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -17,21 +17,19 @@ #include <__algorithm/min.h> #include <__algorithm/rotate.h> #include <__algorithm/transform.h> -#include <__assert> #include <__concepts/arithmetic.h> #include <__concepts/same_as.h> #include <__config> -#include <__format/format_error.h> #include <__format/format_fwd.h> -#include <__format/format_string.h> +#include <__format/format_parse_context.h> #include <__format/formatter.h> #include <__format/formatter_integral.h> +#include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> #include <__memory/allocator.h> #include <__utility/move.h> #include <__utility/unreachable.h> #include <charconv> -#include <cmath> #ifndef _LIBCPP_HAS_NO_LOCALIZATION # include <locale> @@ -48,7 +46,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER > 17 -namespace __format_spec { +namespace __formatter { template <floating_point _Tp> _LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value) { @@ -164,7 +162,7 @@ public: __precision_ = _Traits::__max_fractional; } - __size_ = __format_spec::__float_buffer_size<_Fp>(__precision_); + __size_ = __formatter::__float_buffer_size<_Fp>(__precision_); if (__size_ > _Traits::__stack_buffer_size) // The allocated buffer's contents don't need initialization. __begin_ = allocator<char>{}.allocate(__size_); @@ -233,9 +231,9 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_default(const __float_buffe char* __integral) { __float_result __result; __result.__integral = __integral; - __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value); + __result.__last = __formatter::__to_buffer(__integral, __buffer.end(), __value); - __result.__exponent = __format_spec::__find_exponent(__result.__integral, __result.__last); + __result.__exponent = __formatter::__find_exponent(__result.__integral, __result.__last); // Constrains: // - There's at least one decimal digit before the radix point. @@ -264,9 +262,9 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_hexadecimal_lower_case(cons __float_result __result; __result.__integral = __integral; if (__precision == -1) - __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::hex); + __result.__last = __formatter::__to_buffer(__integral, __buffer.end(), __value, chars_format::hex); else - __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::hex, __precision); + __result.__last = __formatter::__to_buffer(__integral, __buffer.end(), __value, chars_format::hex, __precision); // H = one or more hex-digits // S = sign @@ -315,7 +313,7 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_hexadecimal_upper_case(cons _Tp __value, int __precision, char* __integral) { __float_result __result = - __format_spec::__format_buffer_hexadecimal_lower_case(__buffer, __value, __precision, __integral); + __formatter::__format_buffer_hexadecimal_lower_case(__buffer, __value, __precision, __integral); _VSTD::transform(__result.__integral, __result.__exponent, __result.__integral, __hex_to_upper); *__result.__exponent = 'P'; return __result; @@ -328,13 +326,13 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_scientific_lower_case(const __float_result __result; __result.__integral = __integral; __result.__last = - __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::scientific, __precision); + __formatter::__to_buffer(__integral, __buffer.end(), __value, chars_format::scientific, __precision); char* __first = __integral + 1; _LIBCPP_ASSERT(__first != __result.__last, "No exponent present"); if (*__first == '.') { __result.__radix_point = __first; - __result.__exponent = __format_spec::__find_exponent(__first + 1, __result.__last); + __result.__exponent = __formatter::__find_exponent(__first + 1, __result.__last); } else { __result.__radix_point = __result.__last; __result.__exponent = __first; @@ -354,7 +352,7 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_scientific_upper_case(const _Tp __value, int __precision, char* __integral) { __float_result __result = - __format_spec::__format_buffer_scientific_lower_case(__buffer, __value, __precision, __integral); + __formatter::__format_buffer_scientific_lower_case(__buffer, __value, __precision, __integral); *__result.__exponent = 'E'; return __result; } @@ -364,7 +362,7 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_fixed(const __float_buffer< int __precision, char* __integral) { __float_result __result; __result.__integral = __integral; - __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::fixed, __precision); + __result.__last = __formatter::__to_buffer(__integral, __buffer.end(), __value, chars_format::fixed, __precision); // When there's no precision there's no radix point. // Else the radix point is placed at __precision + 1 from the end. @@ -390,14 +388,14 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_general_lower_case(__float_ __float_result __result; __result.__integral = __integral; - __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::general, __precision); + __result.__last = __formatter::__to_buffer(__integral, __buffer.end(), __value, chars_format::general, __precision); char* __first = __integral + 1; if (__first == __result.__last) { __result.__radix_point = __result.__last; __result.__exponent = __result.__last; } else { - __result.__exponent = __format_spec::__find_exponent(__first, __result.__last); + __result.__exponent = __formatter::__find_exponent(__first, __result.__last); if (__result.__exponent != __result.__last) // In scientific mode if there's a radix point it will always be after // the first digit. (This is the position __first points at). @@ -423,19 +421,79 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_general_lower_case(__float_ template <class _Fp, class _Tp> _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_general_upper_case(__float_buffer<_Fp>& __buffer, _Tp __value, int __precision, char* __integral) { - __float_result __result = - __format_spec::__format_buffer_general_lower_case(__buffer, __value, __precision, __integral); + __float_result __result = __formatter::__format_buffer_general_lower_case(__buffer, __value, __precision, __integral); if (__result.__exponent != __result.__last) *__result.__exponent = 'E'; return __result; } -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +/// Fills the buffer with the data based on the requested formatting. +/// +/// This function, when needed, turns the characters to upper case and +/// determines the "interesting" locations which are returned to the caller. +/// +/// This means the caller never has to convert the contents of the buffer to +/// upper case or search for radix points and the location of the exponent. +/// This gives a bit of overhead. The original code didn't do that, but due +/// to the number of possible additional work needed to turn this number to +/// the proper output the code was littered with tests for upper cases and +/// searches for radix points and exponents. +/// - When a precision larger than the type's precision is selected +/// additional zero characters need to be written before the exponent. +/// - alternate form needs to add a radix point when not present. +/// - localization needs to do grouping in the integral part. +template <class _Fp, class _Tp> +// TODO FMT _Fp should just be _Tp when to_chars has proper long double support. +_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer( + __float_buffer<_Fp>& __buffer, + _Tp __value, + bool __negative, + bool __has_precision, + __format_spec::__sign __sign, + __format_spec::__type __type) { + char* __first = __formatter::__insert_sign(__buffer.begin(), __negative, __sign); + switch (__type) { + case __format_spec::__type::__default: + return __formatter::__format_buffer_default(__buffer, __value, __first); + + case __format_spec::__type::__hexfloat_lower_case: + return __formatter::__format_buffer_hexadecimal_lower_case( + __buffer, __value, __has_precision ? __buffer.__precision() : -1, __first); + + case __format_spec::__type::__hexfloat_upper_case: + return __formatter::__format_buffer_hexadecimal_upper_case( + __buffer, __value, __has_precision ? __buffer.__precision() : -1, __first); + + case __format_spec::__type::__scientific_lower_case: + return __formatter::__format_buffer_scientific_lower_case(__buffer, __value, __buffer.__precision(), __first); + + case __format_spec::__type::__scientific_upper_case: + return __formatter::__format_buffer_scientific_upper_case(__buffer, __value, __buffer.__precision(), __first); + + case __format_spec::__type::__fixed_lower_case: + case __format_spec::__type::__fixed_upper_case: + return __formatter::__format_buffer_fixed(__buffer, __value, __buffer.__precision(), __first); + + case __format_spec::__type::__general_lower_case: + return __formatter::__format_buffer_general_lower_case(__buffer, __value, __buffer.__precision(), __first); + + case __format_spec::__type::__general_upper_case: + return __formatter::__format_buffer_general_upper_case(__buffer, __value, __buffer.__precision(), __first); + + default: + _LIBCPP_ASSERT(false, "The parser should have validated the type"); + __libcpp_unreachable(); + } +} + +# ifndef _LIBCPP_HAS_NO_LOCALIZATION template <class _OutIt, class _Fp, class _CharT> -_LIBCPP_HIDE_FROM_ABI _OutIt __format_locale_specific_form(_OutIt __out_it, const __float_buffer<_Fp>& __buffer, - const __float_result& __result, _VSTD::locale __loc, - size_t __width, _Flags::_Alignment __alignment, - _CharT __fill) { +_LIBCPP_HIDE_FROM_ABI _OutIt __format_locale_specific_form( + _OutIt __out_it, + const __float_buffer<_Fp>& __buffer, + const __float_result& __result, + _VSTD::locale __loc, + __format_spec::__parsed_specifications<_CharT> __specs) { const auto& __np = use_facet<numpunct<_CharT>>(__loc); string __grouping = __np.grouping(); char* __first = __result.__integral; @@ -450,26 +508,27 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __format_locale_specific_form(_OutIt __out_it, cons __grouping = __formatter::__determine_grouping(__digits, __grouping); } - size_t __size = __result.__last - __buffer.begin() + // Formatted string - __buffer.__num_trailing_zeros() + // Not yet rendered zeros - __grouping.size() - // Grouping contains one - !__grouping.empty(); // additional character + ptrdiff_t __size = + __result.__last - __buffer.begin() + // Formatted string + __buffer.__num_trailing_zeros() + // Not yet rendered zeros + __grouping.size() - // Grouping contains one + !__grouping.empty(); // additional character - __formatter::__padding_size_result __padding = {0, 0}; - bool __zero_padding = __alignment == _Flags::_Alignment::__default; - if (__size < __width) { + __formatter::__padding_size_result __padding = {0, 0}; + bool __zero_padding = __specs.__alignment_ == __format_spec::__alignment::__zero_padding; + if (__size < __specs.__width_) { if (__zero_padding) { - __alignment = _Flags::_Alignment::__right; - __fill = _CharT('0'); + __specs.__alignment_ = __format_spec::__alignment::__right; + __specs.__fill_ = _CharT('0'); } - __padding = __formatter::__padding_size(__size, __width, __alignment); + __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_); } // sign and (zero padding or alignment) if (__zero_padding && __first != __buffer.begin()) *__out_it++ = *__buffer.begin(); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, __fill); + __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); if (!__zero_padding && __first != __buffer.begin()) *__out_it++ = *__buffer.begin(); @@ -510,198 +569,148 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __format_locale_specific_form(_OutIt __out_it, cons __out_it = _VSTD::copy(__result.__exponent, __result.__last, _VSTD::move(__out_it)); // alignment - return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill); + return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); +} +# endif // _LIBCPP_HAS_NO_LOCALIZATION + +template <class _OutIt, class _CharT> +_LIBCPP_HIDE_FROM_ABI _OutIt __format_floating_point_non_finite( + _OutIt __out_it, __format_spec::__parsed_specifications<_CharT> __specs, bool __negative, bool __isnan) { + char __buffer[4]; + char* __last = __formatter::__insert_sign(__buffer, __negative, __specs.__std_.__sign_); + + // to_chars can return inf, infinity, nan, and nan(n-char-sequence). + // The format library requires inf and nan. + // All in one expression to avoid dangling references. + bool __upper_case = + __specs.__std_.__type_ == __format_spec::__type::__hexfloat_upper_case || + __specs.__std_.__type_ == __format_spec::__type::__scientific_upper_case || + __specs.__std_.__type_ == __format_spec::__type::__fixed_upper_case || + __specs.__std_.__type_ == __format_spec::__type::__general_upper_case; + __last = _VSTD::copy_n(&("infnanINFNAN"[6 * __upper_case + 3 * __isnan]), 3, __last); + + // [format.string.std]/13 + // A zero (0) character preceding the width field pads the field with + // leading zeros (following any indication of sign or base) to the field + // width, except when applied to an infinity or NaN. + if (__specs.__alignment_ == __format_spec::__alignment::__zero_padding) + __specs.__alignment_ = __format_spec::__alignment::__right; + + return __formatter::__write(__buffer, __last, _VSTD::move(__out_it), __specs); } -# endif // _LIBCPP_HAS_NO_LOCALIZATION - -template <__formatter::__char_type _CharT> -class _LIBCPP_TEMPLATE_VIS __formatter_floating_point : public __parser_floating_point<_CharT> { -public: - template <floating_point _Tp> - _LIBCPP_HIDE_FROM_ABI auto format(_Tp __value, auto& __ctx) -> decltype(__ctx.out()) { - if (this->__width_needs_substitution()) - this->__substitute_width_arg_id(__ctx.arg(this->__width)); - - bool __negative = _VSTD::signbit(__value); - - if (!_VSTD::isfinite(__value)) [[unlikely]] - return __format_non_finite(__ctx.out(), __negative, _VSTD::isnan(__value)); - - bool __has_precision = this->__has_precision_field(); - if (this->__precision_needs_substitution()) - this->__substitute_precision_arg_id(__ctx.arg(this->__precision)); - - // Depending on the std-format-spec string the sign and the value - // might not be outputted together: - // - zero-padding may insert additional '0' characters. - // Therefore the value is processed as a non negative value. - // The function @ref __insert_sign will insert a '-' when the value was - // negative. - - if (__negative) - __value = _VSTD::copysign(__value, +1.0); - - // TODO FMT _Fp should just be _Tp when to_chars has proper long double support. - using _Fp = conditional_t<same_as<_Tp, long double>, double, _Tp>; - // Force the type of the precision to avoid -1 to become an unsigned value. - __float_buffer<_Fp> __buffer(__has_precision ? int(this->__precision) : -1); - __float_result __result = __format_buffer(__buffer, __value, __negative, __has_precision); - - if (this->__alternate_form && __result.__radix_point == __result.__last) { - *__result.__last++ = '.'; - - // When there is an exponent the point needs to be moved before the - // exponent. When there's no exponent the rotate does nothing. Since - // rotate tests whether the operation is a nop, call it unconditionally. - _VSTD::rotate(__result.__exponent, __result.__last - 1, __result.__last); - __result.__radix_point = __result.__exponent; - - // The radix point is always placed before the exponent. - // - No exponent needs to point to the new last. - // - An exponent needs to move one position to the right. - // So it's safe to increment the value unconditionally. - ++__result.__exponent; - } +template <floating_point _Tp, class _CharT> +_LIBCPP_HIDE_FROM_ABI auto +__format_floating_point(_Tp __value, auto& __ctx, __format_spec::__parsed_specifications<_CharT> __specs) + -> decltype(__ctx.out()) { + bool __negative = _VSTD::signbit(__value); -# ifndef _LIBCPP_HAS_NO_LOCALIZATION - if (this->__locale_specific_form) - return __format_spec::__format_locale_specific_form(__ctx.out(), __buffer, __result, __ctx.locale(), - this->__width, this->__alignment, this->__fill); -# endif - - ptrdiff_t __size = __result.__last - __buffer.begin(); - int __num_trailing_zeros = __buffer.__num_trailing_zeros(); - if (__size + __num_trailing_zeros >= this->__width) { - if (__num_trailing_zeros && __result.__exponent != __result.__last) - // Insert trailing zeros before exponent character. - return _VSTD::copy(__result.__exponent, __result.__last, - _VSTD::fill_n(_VSTD::copy(__buffer.begin(), __result.__exponent, __ctx.out()), - __num_trailing_zeros, _CharT('0'))); - - return _VSTD::fill_n(_VSTD::copy(__buffer.begin(), __result.__last, __ctx.out()), __num_trailing_zeros, - _CharT('0')); - } + if (!_VSTD::isfinite(__value)) [[unlikely]] + return __formatter::__format_floating_point_non_finite(__ctx.out(), __specs, __negative, _VSTD::isnan(__value)); - auto __out_it = __ctx.out(); - char* __first = __buffer.begin(); - if (this->__alignment == _Flags::_Alignment::__default) { - // When there is a sign output it before the padding. Note the __size - // doesn't need any adjustment, regardless whether the sign is written - // here or in __formatter::__write. - if (__first != __result.__integral) - *__out_it++ = *__first++; - // After the sign is written, zero padding is the same a right alignment - // with '0'. - this->__alignment = _Flags::_Alignment::__right; - this->__fill = _CharT('0'); - } + // Depending on the std-format-spec string the sign and the value + // might not be outputted together: + // - zero-padding may insert additional '0' characters. + // Therefore the value is processed as a non negative value. + // The function @ref __insert_sign will insert a '-' when the value was + // negative. - if (__num_trailing_zeros) - return __formatter::__write(_VSTD::move(__out_it), __first, __result.__last, __size, this->__width, this->__fill, - this->__alignment, __result.__exponent, __num_trailing_zeros); + if (__negative) + __value = -__value; - return __formatter::__write(_VSTD::move(__out_it), __first, __result.__last, __size, this->__width, this->__fill, - this->__alignment); + // TODO FMT _Fp should just be _Tp when to_chars has proper long double support. + using _Fp = conditional_t<same_as<_Tp, long double>, double, _Tp>; + // Force the type of the precision to avoid -1 to become an unsigned value. + __float_buffer<_Fp> __buffer(__specs.__precision_); + __float_result __result = __formatter::__format_buffer( + __buffer, __value, __negative, (__specs.__has_precision()), __specs.__std_.__sign_, __specs.__std_.__type_); + + if (__specs.__std_.__alternate_form_ && __result.__radix_point == __result.__last) { + *__result.__last++ = '.'; + + // When there is an exponent the point needs to be moved before the + // exponent. When there's no exponent the rotate does nothing. Since + // rotate tests whether the operation is a nop, call it unconditionally. + _VSTD::rotate(__result.__exponent, __result.__last - 1, __result.__last); + __result.__radix_point = __result.__exponent; + + // The radix point is always placed before the exponent. + // - No exponent needs to point to the new last. + // - An exponent needs to move one position to the right. + // So it's safe to increment the value unconditionally. + ++__result.__exponent; } -private: - template <class _OutIt> - _LIBCPP_HIDE_FROM_ABI _OutIt __format_non_finite(_OutIt __out_it, bool __negative, bool __isnan) { - char __buffer[4]; - char* __last = __insert_sign(__buffer, __negative, this->__sign); - - // to_char can return inf, infinity, nan, and nan(n-char-sequence). - // The format library requires inf and nan. - // All in one expression to avoid dangling references. - __last = _VSTD::copy_n(&("infnanINFNAN"[6 * (this->__type == _Flags::_Type::__float_hexadecimal_upper_case || - this->__type == _Flags::_Type::__scientific_upper_case || - this->__type == _Flags::_Type::__fixed_upper_case || - this->__type == _Flags::_Type::__general_upper_case) + - 3 * __isnan]), - 3, __last); - - // [format.string.std]/13 - // A zero (0) character preceding the width field pads the field with - // leading zeros (following any indication of sign or base) to the field - // width, except when applied to an infinity or NaN. - if (this->__alignment == _Flags::_Alignment::__default) - this->__alignment = _Flags::_Alignment::__right; - - ptrdiff_t __size = __last - __buffer; - if (__size >= this->__width) - return _VSTD::copy_n(__buffer, __size, _VSTD::move(__out_it)); - - return __formatter::__write(_VSTD::move(__out_it), __buffer, __last, __size, this->__width, this->__fill, - this->__alignment); +# ifndef _LIBCPP_HAS_NO_LOCALIZATION + if (__specs.__std_.__locale_specific_form_) + return __formatter::__format_locale_specific_form(__ctx.out(), __buffer, __result, __ctx.locale(), __specs); +# endif + + ptrdiff_t __size = __result.__last - __buffer.begin(); + int __num_trailing_zeros = __buffer.__num_trailing_zeros(); + if (__size + __num_trailing_zeros >= __specs.__width_) { + if (__num_trailing_zeros && __result.__exponent != __result.__last) + // Insert trailing zeros before exponent character. + return _VSTD::copy( + __result.__exponent, + __result.__last, + _VSTD::fill_n( + _VSTD::copy(__buffer.begin(), __result.__exponent, __ctx.out()), __num_trailing_zeros, _CharT('0'))); + + return _VSTD::fill_n( + _VSTD::copy(__buffer.begin(), __result.__last, __ctx.out()), __num_trailing_zeros, _CharT('0')); } - /// Fills the buffer with the data based on the requested formatting. - /// - /// This function, when needed, turns the characters to upper case and - /// determines the "interesting" locations which are returned to the caller. - /// - /// This means the caller never has to convert the contents of the buffer to - /// upper case or search for radix points and the location of the exponent. - /// This gives a bit of overhead. The original code didn't do that, but due - /// to the number of possible additional work needed to turn this number to - /// the proper output the code was littered with tests for upper cases and - /// searches for radix points and exponents. - /// - When a precision larger than the type's precision is selected - /// additional zero characters need to be written before the exponent. - /// - alternate form needs to add a radix point when not present. - /// - localization needs to do grouping in the integral part. - template <class _Fp, class _Tp> - // TODO FMT _Fp should just be _Tp when to_chars has proper long double support. - _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer(__float_buffer<_Fp>& __buffer, _Tp __value, bool __negative, - bool __has_precision) { - char* __first = __insert_sign(__buffer.begin(), __negative, this->__sign); - switch (this->__type) { - case _Flags::_Type::__default: - return __format_spec::__format_buffer_default(__buffer, __value, __first); - - case _Flags::_Type::__float_hexadecimal_lower_case: - return __format_spec::__format_buffer_hexadecimal_lower_case( - __buffer, __value, __has_precision ? __buffer.__precision() : -1, __first); - - case _Flags::_Type::__float_hexadecimal_upper_case: - return __format_spec::__format_buffer_hexadecimal_upper_case( - __buffer, __value, __has_precision ? __buffer.__precision() : -1, __first); - - case _Flags::_Type::__scientific_lower_case: - return __format_spec::__format_buffer_scientific_lower_case(__buffer, __value, __buffer.__precision(), __first); + auto __out_it = __ctx.out(); + char* __first = __buffer.begin(); + if (__specs.__alignment_ == __format_spec::__alignment ::__zero_padding) { + // When there is a sign output it before the padding. Note the __size + // doesn't need any adjustment, regardless whether the sign is written + // here or in __formatter::__write. + if (__first != __result.__integral) + *__out_it++ = *__first++; + // After the sign is written, zero padding is the same a right alignment + // with '0'. + __specs.__alignment_ = __format_spec::__alignment::__right; + __specs.__fill_ = _CharT('0'); + } - case _Flags::_Type::__scientific_upper_case: - return __format_spec::__format_buffer_scientific_upper_case(__buffer, __value, __buffer.__precision(), __first); + if (__num_trailing_zeros) + return __formatter::__write_using_trailing_zeros( + __first, __result.__last, _VSTD::move(__out_it), __specs, __size, __result.__exponent, __num_trailing_zeros); - case _Flags::_Type::__fixed_lower_case: - case _Flags::_Type::__fixed_upper_case: - return __format_spec::__format_buffer_fixed(__buffer, __value, __buffer.__precision(), __first); + return __formatter::__write(__first, __result.__last, _VSTD::move(__out_it), __specs, __size); +} - case _Flags::_Type::__general_lower_case: - return __format_spec::__format_buffer_general_lower_case(__buffer, __value, __buffer.__precision(), __first); +} // namespace __formatter - case _Flags::_Type::__general_upper_case: - return __format_spec::__format_buffer_general_upper_case(__buffer, __value, __buffer.__precision(), __first); +template <__formatter::__char_type _CharT> +struct _LIBCPP_TEMPLATE_VIS __formatter_floating_point { +public: + _LIBCPP_HIDE_FROM_ABI constexpr auto + parse(basic_format_parse_context<_CharT>& __parse_ctx) -> decltype(__parse_ctx.begin()) { + auto __result = __parser_.__parse(__parse_ctx, __format_spec::__fields_floating_point); + __format_spec::__process_parsed_floating_point(__parser_); + return __result; + } - default: - _LIBCPP_ASSERT(false, "The parser should have validated the type"); - __libcpp_unreachable(); - } + template <floating_point _Tp> + _LIBCPP_HIDE_FROM_ABI auto format(_Tp __value, auto& __ctx) const -> decltype(__ctx.out()) { + return __formatter::__format_floating_point(__value, __ctx, __parser_.__get_parsed_std_specifications(__ctx)); } -}; -} //namespace __format_spec + __format_spec::__parser<_CharT> __parser_; +}; template <__formatter::__char_type _CharT> struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<float, _CharT> - : public __format_spec::__formatter_floating_point<_CharT> {}; + : public __formatter_floating_point<_CharT> {}; template <__formatter::__char_type _CharT> struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<double, _CharT> - : public __format_spec::__formatter_floating_point<_CharT> {}; + : public __formatter_floating_point<_CharT> {}; template <__formatter::__char_type _CharT> struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<long double, _CharT> - : public __format_spec::__formatter_floating_point<_CharT> {}; + : public __formatter_floating_point<_CharT> {}; #endif //_LIBCPP_STD_VER > 17 diff --git a/libcxx/include/__format/formatter_integer.h b/libcxx/include/__format/formatter_integer.h index 5d11f8d1d990..0281b4f2fa67 100644 --- a/libcxx/include/__format/formatter_integer.h +++ b/libcxx/include/__format/formatter_integer.h @@ -13,23 +13,18 @@ #include <__availability> #include <__concepts/arithmetic.h> #include <__config> -#include <__format/format_error.h> // TODO FMT Remove after adding 128-bit support #include <__format/format_fwd.h> #include <__format/format_parse_context.h> #include <__format/formatter.h> #include <__format/formatter_integral.h> #include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> -#include <limits> // TODO FMT Remove after adding 128-bit support #include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif -_LIBCPP_PUSH_MACROS // TODO FMT Remove after adding 128-bit support -#include <__undef_macros> - _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER > 17 @@ -79,18 +74,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<long long, _Ch # ifndef _LIBCPP_HAS_NO_INT128 template <__formatter::__char_type _CharT> struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<__int128_t, _CharT> - : public __formatter_integer<_CharT> { - using _Base = __formatter_integer<_CharT>; - - _LIBCPP_HIDE_FROM_ABI auto format(__int128_t __value, auto& __ctx) const -> decltype(__ctx.out()) { - // TODO FMT Implement full 128 bit support. - using _To = long long; - if (__value < numeric_limits<_To>::min() || __value > numeric_limits<_To>::max()) - std::__throw_format_error("128-bit value is outside of implemented range"); - - return _Base::format(static_cast<_To>(__value), __ctx); - } -}; + : public __formatter_integer<_CharT> {}; # endif // Unsigned integral types. @@ -112,24 +96,11 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<unsigned long # ifndef _LIBCPP_HAS_NO_INT128 template <__formatter::__char_type _CharT> struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<__uint128_t, _CharT> - : public __formatter_integer<_CharT> { - using _Base = __formatter_integer<_CharT>; - - _LIBCPP_HIDE_FROM_ABI auto format(__uint128_t __value, auto& __ctx) const -> decltype(__ctx.out()) { - // TODO FMT Implement full 128 bit support. - using _To = unsigned long long; - if (__value < numeric_limits<_To>::min() || __value > numeric_limits<_To>::max()) - std::__throw_format_error("128-bit value is outside of implemented range"); - - return _Base::format(static_cast<_To>(__value), __ctx); - } -}; + : public __formatter_integer<_CharT> {}; # endif #endif //_LIBCPP_STD_VER > 17 _LIBCPP_END_NAMESPACE_STD -_LIBCPP_POP_MACROS - #endif // _LIBCPP___FORMAT_FORMATTER_INTEGER_H diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h index 4ad6de0ec66f..d6fa5ec18eb8 100644 --- a/libcxx/include/__format/formatter_integral.h +++ b/libcxx/include/__format/formatter_integral.h @@ -207,10 +207,6 @@ _LIBCPP_HIDE_FROM_ABI auto __format_integer( char* __end, const char* __prefix, int __base) -> decltype(__ctx.out()) { - _LIBCPP_ASSERT( - __specs.__alignment_ != __format_spec::__alignment::__default, - "the caller should adjust the default to the value required by the type"); - char* __first = __formatter::__insert_sign(__begin, __negative, __specs.__std_.__sign_); if (__specs.__std_.__alternate_form_ && __prefix) while (*__prefix) @@ -280,6 +276,7 @@ _LIBCPP_HIDE_FROM_ABI auto __format_integer( return __formatter::__format_integer( __value, __ctx, __specs, __negative, __array.begin(), __array.end(), __value != 0 ? "0" : nullptr, 8); } + case __format_spec::__type::__default: case __format_spec::__type::__decimal: { array<char, __formatter::__buffer_size<decltype(__value), 10>()> __array; return __formatter::__format_integer( diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h index ab016f6f1610..c59cbbeeb5dd 100644 --- a/libcxx/include/__format/formatter_output.h +++ b/libcxx/include/__format/formatter_output.h @@ -33,8 +33,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __formatter { -_LIBCPP_HIDE_FROM_ABI constexpr char __hex_to_upper(char c) { - switch (c) { +_LIBCPP_HIDE_FROM_ABI constexpr char __hex_to_upper(char __c) { + switch (__c) { case 'a': return 'A'; case 'b': @@ -48,27 +48,22 @@ _LIBCPP_HIDE_FROM_ABI constexpr char __hex_to_upper(char c) { case 'f': return 'F'; } - return c; + return __c; } -// TODO FMT remove _v2 suffix. -struct _LIBCPP_TYPE_VIS __padding_size_result_v2 { +struct _LIBCPP_TYPE_VIS __padding_size_result { size_t __before_; size_t __after_; }; -// TODO FMT remove _v2 suffix. -_LIBCPP_HIDE_FROM_ABI constexpr __padding_size_result_v2 __padding_size_v2(size_t __size, size_t __width, - __format_spec::__alignment __align) { +_LIBCPP_HIDE_FROM_ABI constexpr __padding_size_result +__padding_size(size_t __size, size_t __width, __format_spec::__alignment __align) { _LIBCPP_ASSERT(__width > __size, "don't call this function when no padding is required"); - _LIBCPP_ASSERT(__align != __format_spec::__alignment::__default, - "the caller should adjust the default to the value required by the type"); _LIBCPP_ASSERT(__align != __format_spec::__alignment::__zero_padding, "the caller should have handled the zero-padding"); size_t __fill = __width - __size; switch (__align) { - case __format_spec::__alignment::__default: case __format_spec::__alignment::__zero_padding: __libcpp_unreachable(); @@ -83,6 +78,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr __padding_size_result_v2 __padding_size_v2(size_ size_t __after = __fill - __before; return {__before, __after}; } + case __format_spec::__alignment::__default: case __format_spec::__alignment::__right: return {__fill, 0}; } @@ -93,14 +89,11 @@ template <class _OutIt, class _CharT> _LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first, const char* __last, string&& __grouping, _CharT __sep, __format_spec::__parsed_specifications<_CharT> __specs) { - _LIBCPP_ASSERT(__specs.__alignment_ != __format_spec::__alignment::__default, - "the caller should adjust the default to the value required by the type"); - int __size = (__first - __begin) + // [sign][prefix] (__last - __first) + // data (__grouping.size() - 1); // number of separator characters - __padding_size_result_v2 __padding = {0, 0}; + __padding_size_result __padding = {0, 0}; if (__specs.__alignment_ == __format_spec::__alignment::__zero_padding) { // Write [sign][prefix]. __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it)); @@ -113,7 +106,7 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, c } else { if (__specs.__width_ > __size) { // Determine padding and write padding. - __padding = __padding_size_v2(__size, __specs.__width_, __specs.__alignment_); + __padding = __padding_size(__size, __specs.__width_, __specs.__alignment_); __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); } @@ -189,8 +182,7 @@ _LIBCPP_HIDE_FROM_ABI auto __write(const _CharT* __first, const _CharT* __last, if (__size >= __specs.__width_) return _VSTD::copy(__first, __last, _VSTD::move(__out_it)); - __padding_size_result_v2 __padding = - __formatter::__padding_size_v2(__size, __specs.__width_, __specs.__std_.__alignment_); + __padding_size_result __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__std_.__alignment_); __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); __out_it = _VSTD::copy(__first, __last, _VSTD::move(__out_it)); return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); @@ -216,12 +208,41 @@ _LIBCPP_HIDE_FROM_ABI auto __write_transformed(const _CharT* __first, const _Cha if (__size >= __specs.__width_) return _VSTD::transform(__first, __last, _VSTD::move(__out_it), __op); - __padding_size_result_v2 __padding = __padding_size_v2(__size, __specs.__width_, __specs.__alignment_); + __padding_size_result __padding = __padding_size(__size, __specs.__width_, __specs.__alignment_); __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); __out_it = _VSTD::transform(__first, __last, _VSTD::move(__out_it), __op); return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); } +/// Writes additional zero's for the precision before the exponent. +/// This is used when the precision requested in the format string is larger +/// than the maximum precision of the floating-point type. These precision +/// digits are always 0. +/// +/// \param __exponent The location of the exponent character. +/// \param __num_trailing_zeros The number of 0's to write before the exponent +/// character. +template <class _CharT, class _ParserCharT> +_LIBCPP_HIDE_FROM_ABI auto __write_using_trailing_zeros( + const _CharT* __first, + const _CharT* __last, + output_iterator<const _CharT&> auto __out_it, + __format_spec::__parsed_specifications<_ParserCharT> __specs, + size_t __size, + const _CharT* __exponent, + size_t __num_trailing_zeros) -> decltype(__out_it) { + _LIBCPP_ASSERT(__first <= __last, "Not a valid range"); + _LIBCPP_ASSERT(__num_trailing_zeros > 0, "The overload not writing trailing zeros should have been used"); + + __padding_size_result __padding = + __padding_size(__size + __num_trailing_zeros, __specs.__width_, __specs.__alignment_); + __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); + __out_it = _VSTD::copy(__first, __exponent, _VSTD::move(__out_it)); + __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0')); + __out_it = _VSTD::copy(__exponent, __last, _VSTD::move(__out_it)); + return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); +} + # ifndef _LIBCPP_HAS_NO_UNICODE template <class _CharT> _LIBCPP_HIDE_FROM_ABI auto __write_unicode_no_precision(basic_string_view<_CharT> __str, diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h index 739bdf457e40..034fc55a44dc 100644 --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -44,168 +44,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __format_spec { -/** - * Contains the flags for the std-format-spec. - * - * Some format-options can only be used for specific C++ types and may depend on - * the selected format-type. - * * The C++type filtering can be done using the proper policies for - * @ref __parser_std. - * * The format-type filtering needs to be done post parsing in the parser - * derived from @ref __parser_std. - */ -_LIBCPP_PACKED_BYTE_FOR_AIX -class _LIBCPP_TYPE_VIS _Flags { -public: - enum class _LIBCPP_ENUM_VIS _Alignment : uint8_t { - /** - * No alignment is set in the format string. - * - * Zero-padding is ignored when an alignment is selected. - * The default alignment depends on the selected format-type. - */ - __default, - __left, - __center, - __right - }; - enum class _LIBCPP_ENUM_VIS _Sign : uint8_t { - /** - * No sign is set in the format string. - * - * The sign isn't allowed for certain format-types. By using this value - * it's possible to detect whether or not the user explicitly set the sign - * flag. For formatting purposes it behaves the same as @ref __minus. - */ - __default, - __minus, - __plus, - __space - }; - - _Alignment __alignment : 2 {_Alignment::__default}; - _Sign __sign : 2 {_Sign::__default}; - uint8_t __alternate_form : 1 {false}; - uint8_t __zero_padding : 1 {false}; - uint8_t __locale_specific_form : 1 {false}; - - enum class _LIBCPP_ENUM_VIS _Type : uint8_t { - __default, - __string, - __binary_lower_case, - __binary_upper_case, - __octal, - __decimal, - __hexadecimal_lower_case, - __hexadecimal_upper_case, - __pointer, - __char, - __float_hexadecimal_lower_case, - __float_hexadecimal_upper_case, - __scientific_lower_case, - __scientific_upper_case, - __fixed_lower_case, - __fixed_upper_case, - __general_lower_case, - __general_upper_case - }; - - _Type __type{_Type::__default}; -}; -_LIBCPP_PACKED_BYTE_FOR_AIX_END - -namespace __detail { -template <class _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr bool -__parse_alignment(_CharT __c, _Flags& __flags) noexcept { - switch (__c) { - case _CharT('<'): - __flags.__alignment = _Flags::_Alignment::__left; - return true; - - case _CharT('^'): - __flags.__alignment = _Flags::_Alignment::__center; - return true; - - case _CharT('>'): - __flags.__alignment = _Flags::_Alignment::__right; - return true; - } - return false; -} -} // namespace __detail - -template <class _CharT> -class _LIBCPP_TEMPLATE_VIS __parser_fill_align { -public: - // TODO FMT The standard doesn't specify this character is a Unicode - // character. Validate what fmt and MSVC have implemented. - _CharT __fill{_CharT(' ')}; - -protected: - _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* - __parse(const _CharT* __begin, const _CharT* __end, _Flags& __flags) { - _LIBCPP_ASSERT(__begin != __end, - "When called with an empty input the function will cause " - "undefined behavior by evaluating data not in the input"); - if (__begin + 1 != __end) { - if (__detail::__parse_alignment(*(__begin + 1), __flags)) { - if (*__begin == _CharT('{') || *__begin == _CharT('}')) - __throw_format_error( - "The format-spec fill field contains an invalid character"); - __fill = *__begin; - return __begin + 2; - } - } - - if (__detail::__parse_alignment(*__begin, __flags)) - return __begin + 1; - - return __begin; - } -}; - -template <class _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr const _CharT* -__parse_sign(const _CharT* __begin, _Flags& __flags) noexcept { - switch (*__begin) { - case _CharT('-'): - __flags.__sign = _Flags::_Sign::__minus; - break; - case _CharT('+'): - __flags.__sign = _Flags::_Sign::__plus; - break; - case _CharT(' '): - __flags.__sign = _Flags::_Sign::__space; - break; - default: - return __begin; - } - return __begin + 1; -} - -template <class _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr const _CharT* -__parse_alternate_form(const _CharT* __begin, _Flags& __flags) noexcept { - if (*__begin == _CharT('#')) { - __flags.__alternate_form = true; - ++__begin; - } - - return __begin; -} - -template <class _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr const _CharT* -__parse_zero_padding(const _CharT* __begin, _Flags& __flags) noexcept { - if (*__begin == _CharT('0')) { - __flags.__zero_padding = true; - ++__begin; - } - - return __begin; -} - template <class _CharT> _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT> __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) { @@ -226,7 +64,7 @@ __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) { template <class _Context> _LIBCPP_HIDE_FROM_ABI constexpr uint32_t -__substitute_arg_id(basic_format_arg<_Context> _Arg) { +__substitute_arg_id(basic_format_arg<_Context> __format_arg) { return visit_format_arg( [](auto __arg) -> uint32_t { using _Type = decltype(__arg); @@ -250,685 +88,9 @@ __substitute_arg_id(basic_format_arg<_Context> _Arg) { __throw_format_error("A format-spec arg-id replacement argument " "isn't an integral type"); }, - _Arg); -} - -class _LIBCPP_TYPE_VIS __parser_width { -public: - /** Contains a width or an arg-id. */ - uint32_t __width : 31 {0}; - /** Determines whether the value stored is a width or an arg-id. */ - uint32_t __width_as_arg : 1 {0}; - - /** - * Does the supplied width field contain an arg-id? - * - * If @c true the formatter needs to call @ref __substitute_width_arg_id. - */ - constexpr bool __width_needs_substitution() const noexcept { return __width_as_arg; } - -protected: - /** - * Does the supplied std-format-spec contain a width field? - * - * When the field isn't present there's no padding required. This can be used - * to optimize the formatting. - */ - constexpr bool __has_width_field() const noexcept { return __width_as_arg || __width; } - - template <class _CharT> - _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* - __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) { - if (*__begin == _CharT('0')) - __throw_format_error( - "A format-spec width field shouldn't have a leading zero"); - - if (*__begin == _CharT('{')) { - __format::__parse_number_result __r = - __parse_arg_id(++__begin, __end, __parse_ctx); - __width = __r.__value; - __width_as_arg = 1; - return __r.__ptr; - } - - if (*__begin < _CharT('0') || *__begin > _CharT('9')) - return __begin; - - __format::__parse_number_result __r = - __format::__parse_number(__begin, __end); - __width = __r.__value; - _LIBCPP_ASSERT(__width != 0, - "A zero value isn't allowed and should be impossible, " - "due to validations in this function"); - return __r.__ptr; - } - - _LIBCPP_HIDE_FROM_ABI constexpr void __substitute_width_arg_id(auto __arg) { - _LIBCPP_ASSERT(__width_as_arg == 1, - "Substitute width called when no substitution is required"); - - // The clearing of the flag isn't required but looks better when debugging - // the code. - __width_as_arg = 0; - __width = __substitute_arg_id(__arg); - if (__width == 0) - __throw_format_error( - "A format-spec width field replacement should have a positive value"); - } -}; - -class _LIBCPP_TYPE_VIS __parser_precision { -public: - /** Contains a precision or an arg-id. */ - uint32_t __precision : 31 {__format::__number_max}; - /** - * Determines whether the value stored is a precision or an arg-id. - * - * @note Since @ref __precision == @ref __format::__number_max is a valid - * value, the default value contains an arg-id of INT32_MAX. (This number of - * arguments isn't supported by compilers.) This is used to detect whether - * the std-format-spec contains a precision field. - */ - uint32_t __precision_as_arg : 1 {1}; - - /** - * Does the supplied precision field contain an arg-id? - * - * If @c true the formatter needs to call @ref __substitute_precision_arg_id. - */ - constexpr bool __precision_needs_substitution() const noexcept { - return __precision_as_arg && __precision != __format::__number_max; - } - -protected: - /** - * Does the supplied std-format-spec contain a precision field? - * - * When the field isn't present there's no truncating required. This can be - * used to optimize the formatting. - */ - constexpr bool __has_precision_field() const noexcept { - - return __precision_as_arg == 0 || // Contains a value? - __precision != __format::__number_max; // The arg-id is valid? - } - - template <class _CharT> - _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* - __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) { - if (*__begin != _CharT('.')) - return __begin; - - ++__begin; - if (__begin == __end) - __throw_format_error("End of input while parsing format-spec precision"); - - if (*__begin == _CharT('{')) { - __format::__parse_number_result __arg_id = - __parse_arg_id(++__begin, __end, __parse_ctx); - _LIBCPP_ASSERT(__arg_id.__value != __format::__number_max, - "Unsupported number of arguments, since this number of " - "arguments is used a special value"); - __precision = __arg_id.__value; - return __arg_id.__ptr; - } - - if (*__begin < _CharT('0') || *__begin > _CharT('9')) - __throw_format_error( - "The format-spec precision field doesn't contain a value or arg-id"); - - __format::__parse_number_result __r = - __format::__parse_number(__begin, __end); - __precision = __r.__value; - __precision_as_arg = 0; - return __r.__ptr; - } - - _LIBCPP_HIDE_FROM_ABI constexpr void __substitute_precision_arg_id( - auto __arg) { - _LIBCPP_ASSERT( - __precision_as_arg == 1 && __precision != __format::__number_max, - "Substitute precision called when no substitution is required"); - - // The clearing of the flag isn't required but looks better when debugging - // the code. - __precision_as_arg = 0; - __precision = __substitute_arg_id(__arg); - } -}; - -template <class _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr const _CharT* -__parse_locale_specific_form(const _CharT* __begin, _Flags& __flags) noexcept { - if (*__begin == _CharT('L')) { - __flags.__locale_specific_form = true; - ++__begin; - } - - return __begin; -} - -template <class _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr const _CharT* -__parse_type(const _CharT* __begin, _Flags& __flags) { - - // Determines the type. It does not validate whether the selected type is - // valid. Most formatters have optional fields that are only allowed for - // certain types. These parsers need to do validation after the type has - // been parsed. So its easier to implement the validation for all types in - // the specific parse function. - switch (*__begin) { - case 'A': - __flags.__type = _Flags::_Type::__float_hexadecimal_upper_case; - break; - case 'B': - __flags.__type = _Flags::_Type::__binary_upper_case; - break; - case 'E': - __flags.__type = _Flags::_Type::__scientific_upper_case; - break; - case 'F': - __flags.__type = _Flags::_Type::__fixed_upper_case; - break; - case 'G': - __flags.__type = _Flags::_Type::__general_upper_case; - break; - case 'X': - __flags.__type = _Flags::_Type::__hexadecimal_upper_case; - break; - case 'a': - __flags.__type = _Flags::_Type::__float_hexadecimal_lower_case; - break; - case 'b': - __flags.__type = _Flags::_Type::__binary_lower_case; - break; - case 'c': - __flags.__type = _Flags::_Type::__char; - break; - case 'd': - __flags.__type = _Flags::_Type::__decimal; - break; - case 'e': - __flags.__type = _Flags::_Type::__scientific_lower_case; - break; - case 'f': - __flags.__type = _Flags::_Type::__fixed_lower_case; - break; - case 'g': - __flags.__type = _Flags::_Type::__general_lower_case; - break; - case 'o': - __flags.__type = _Flags::_Type::__octal; - break; - case 'p': - __flags.__type = _Flags::_Type::__pointer; - break; - case 's': - __flags.__type = _Flags::_Type::__string; - break; - case 'x': - __flags.__type = _Flags::_Type::__hexadecimal_lower_case; - break; - default: - return __begin; - } - return ++__begin; -} - -/** - * Process the parsed alignment and zero-padding state of arithmetic types. - * - * [format.string.std]/13 - * If the 0 character and an align option both appear, the 0 character is - * ignored. - * - * For the formatter a @ref __default alignment means zero-padding. - */ -_LIBCPP_HIDE_FROM_ABI constexpr void __process_arithmetic_alignment(_Flags& __flags) { - __flags.__zero_padding &= __flags.__alignment == _Flags::_Alignment::__default; - if (!__flags.__zero_padding && __flags.__alignment == _Flags::_Alignment::__default) - __flags.__alignment = _Flags::_Alignment::__right; + __format_arg); } -/** - * The parser for the std-format-spec. - * - * [format.string.std]/1 specifies the std-format-spec: - * fill-and-align sign # 0 width precision L type - * - * All these fields are optional. Whether these fields can be used depend on: - * - The type supplied to the format string. - * E.g. A string never uses the sign field so the field may not be set. - * This constrain is validated by the parsers in this file. - * - The supplied value for the optional type field. - * E.g. A int formatted as decimal uses the sign field. - * When formatted as a char the sign field may no longer be set. - * This constrain isn't validated by the parsers in this file. - * - * The base classes are ordered to minimize the amount of padding. - * - * This implements the parser for the string types. - */ -template <class _CharT> -class _LIBCPP_TEMPLATE_VIS __parser_string - : public __parser_width, // provides __width(|as_arg) - public __parser_precision, // provides __precision(|as_arg) - public __parser_fill_align<_CharT>, // provides __fill and uses __flags - public _Flags // provides __flags -{ -public: - using char_type = _CharT; - - _LIBCPP_HIDE_FROM_ABI constexpr __parser_string() { - this->__alignment = _Flags::_Alignment::__left; - } - - /** - * The low-level std-format-spec parse function. - * - * @pre __begin points at the beginning of the std-format-spec. This means - * directly after the ':'. - * @pre The std-format-spec parses the entire input, or the first unmatched - * character is a '}'. - * - * @returns The iterator pointing at the last parsed character. - */ - _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx) - -> decltype(__parse_ctx.begin()) { - auto __it = __parse(__parse_ctx); - __process_display_type(); - return __it; - } - -private: - /** - * Parses the std-format-spec. - * - * @throws __throw_format_error When @a __parse_ctx contains an ill-formed - * std-format-spec. - * - * @returns An iterator to the end of input or point at the closing '}'. - */ - _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) - -> decltype(__parse_ctx.begin()) { - - auto __begin = __parse_ctx.begin(); - auto __end = __parse_ctx.end(); - if (__begin == __end) - return __begin; - - __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, - static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parser_width::__parse(__begin, __end, __parse_ctx); - if (__begin == __end) - return __begin; - - __begin = __parser_precision::__parse(__begin, __end, __parse_ctx); - if (__begin == __end) - return __begin; - - __begin = __parse_type(__begin, static_cast<_Flags&>(*this)); - - if (__begin != __end && *__begin != _CharT('}')) - __throw_format_error( - "The format-spec should consume the input or end with a '}'"); - - return __begin; - } - - /** Processes the parsed std-format-spec based on the parsed display type. */ - _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() { - switch (this->__type) { - case _Flags::_Type::__default: - case _Flags::_Type::__string: - break; - - default: - __throw_format_error("The format-spec type has a type not supported for " - "a string argument"); - } - } -}; - -/** - * The parser for the std-format-spec. - * - * This implements the parser for the integral types. This includes the - * character type and boolean type. - * - * See @ref __parser_string. - */ -template <class _CharT> -class _LIBCPP_TEMPLATE_VIS __parser_integral - : public __parser_width, // provides __width(|as_arg) - public __parser_fill_align<_CharT>, // provides __fill and uses __flags - public _Flags // provides __flags -{ -public: - using char_type = _CharT; - -protected: - /** - * The low-level std-format-spec parse function. - * - * @pre __begin points at the beginning of the std-format-spec. This means - * directly after the ':'. - * @pre The std-format-spec parses the entire input, or the first unmatched - * character is a '}'. - * - * @returns The iterator pointing at the last parsed character. - */ - _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) - -> decltype(__parse_ctx.begin()) { - auto __begin = __parse_ctx.begin(); - auto __end = __parse_ctx.end(); - if (__begin == __end) - return __begin; - - __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, - static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parse_sign(__begin, static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parser_width::__parse(__begin, __end, __parse_ctx); - if (__begin == __end) - return __begin; - - __begin = - __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parse_type(__begin, static_cast<_Flags&>(*this)); - - if (__begin != __end && *__begin != _CharT('}')) - __throw_format_error( - "The format-spec should consume the input or end with a '}'"); - - return __begin; - } - - /** Handles the post-parsing updates for the integer types. */ - _LIBCPP_HIDE_FROM_ABI constexpr void __handle_integer() noexcept { - __process_arithmetic_alignment(static_cast<_Flags&>(*this)); - } - - /** - * Handles the post-parsing updates for the character types. - * - * Sets the alignment and validates the format flags set for a character type. - * - * At the moment the validation for a character and a Boolean behave the - * same, but this may change in the future. - * Specifically at the moment the locale-specific form is allowed for the - * char output type, but it has no effect on the output. - */ - _LIBCPP_HIDE_FROM_ABI constexpr void __handle_char() { __handle_bool(); } - - /** - * Handles the post-parsing updates for the Boolean types. - * - * Sets the alignment and validates the format flags set for a Boolean type. - */ - _LIBCPP_HIDE_FROM_ABI constexpr void __handle_bool() { - if (this->__sign != _Flags::_Sign::__default) - __throw_format_error("A sign field isn't allowed in this format-spec"); - - if (this->__alternate_form) - __throw_format_error( - "An alternate form field isn't allowed in this format-spec"); - - if (this->__zero_padding) - __throw_format_error( - "A zero-padding field isn't allowed in this format-spec"); - - if (this->__alignment == _Flags::_Alignment::__default) - this->__alignment = _Flags::_Alignment::__left; - } -}; - -/** - * The parser for the std-format-spec. - * - * This implements the parser for the floating-point types. - * - * See @ref __parser_string. - */ -template <class _CharT> -class _LIBCPP_TEMPLATE_VIS __parser_floating_point - : public __parser_width, // provides __width(|as_arg) - public __parser_precision, // provides __precision(|as_arg) - public __parser_fill_align<_CharT>, // provides __fill and uses __flags - public _Flags // provides __flags -{ -public: - using char_type = _CharT; - - /** - * The low-level std-format-spec parse function. - * - * @pre __begin points at the beginning of the std-format-spec. This means - * directly after the ':'. - * @pre The std-format-spec parses the entire input, or the first unmatched - * character is a '}'. - * - * @returns The iterator pointing at the last parsed character. - */ - _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx) - -> decltype(__parse_ctx.begin()) { - auto __it = __parse(__parse_ctx); - __process_arithmetic_alignment(static_cast<_Flags&>(*this)); - __process_display_type(); - return __it; - } -protected: - /** - * The low-level std-format-spec parse function. - * - * @pre __begin points at the beginning of the std-format-spec. This means - * directly after the ':'. - * @pre The std-format-spec parses the entire input, or the first unmatched - * character is a '}'. - * - * @returns The iterator pointing at the last parsed character. - */ - _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) - -> decltype(__parse_ctx.begin()) { - auto __begin = __parse_ctx.begin(); - auto __end = __parse_ctx.end(); - if (__begin == __end) - return __begin; - - __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, - static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parse_sign(__begin, static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parser_width::__parse(__begin, __end, __parse_ctx); - if (__begin == __end) - return __begin; - - __begin = __parser_precision::__parse(__begin, __end, __parse_ctx); - if (__begin == __end) - return __begin; - - __begin = - __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - __begin = __parse_type(__begin, static_cast<_Flags&>(*this)); - - if (__begin != __end && *__begin != _CharT('}')) - __throw_format_error( - "The format-spec should consume the input or end with a '}'"); - - return __begin; - } - - /** Processes the parsed std-format-spec based on the parsed display type. */ - _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() { - switch (this->__type) { - case _Flags::_Type::__default: - // When no precision specified then it keeps default since that - // formatting differs from the other types. - if (this->__has_precision_field()) - this->__type = _Flags::_Type::__general_lower_case; - break; - case _Flags::_Type::__float_hexadecimal_lower_case: - case _Flags::_Type::__float_hexadecimal_upper_case: - // Precision specific behavior will be handled later. - break; - case _Flags::_Type::__scientific_lower_case: - case _Flags::_Type::__scientific_upper_case: - case _Flags::_Type::__fixed_lower_case: - case _Flags::_Type::__fixed_upper_case: - case _Flags::_Type::__general_lower_case: - case _Flags::_Type::__general_upper_case: - if (!this->__has_precision_field()) { - // Set the default precision for the call to to_chars. - this->__precision = 6; - this->__precision_as_arg = false; - } - break; - - default: - __throw_format_error("The format-spec type has a type not supported for " - "a floating-point argument"); - } - } -}; - -/** - * The parser for the std-format-spec. - * - * This implements the parser for the pointer types. - * - * See @ref __parser_string. - */ -template <class _CharT> -class _LIBCPP_TEMPLATE_VIS __parser_pointer : public __parser_width, // provides __width(|as_arg) - public __parser_fill_align<_CharT>, // provides __fill and uses __flags - public _Flags // provides __flags -{ -public: - using char_type = _CharT; - - _LIBCPP_HIDE_FROM_ABI constexpr __parser_pointer() { - // Implements LWG3612 Inconsistent pointer alignment in std::format. - // The issue's current status is "Tentatively Ready" and libc++ status is - // still experimental. - // - // TODO FMT Validate this with the final resolution of LWG3612. - this->__alignment = _Flags::_Alignment::__right; - } - - /** - * The low-level std-format-spec parse function. - * - * @pre __begin points at the beginning of the std-format-spec. This means - * directly after the ':'. - * @pre The std-format-spec parses the entire input, or the first unmatched - * character is a '}'. - * - * @returns The iterator pointing at the last parsed character. - */ - _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx) -> decltype(__parse_ctx.begin()) { - auto __it = __parse(__parse_ctx); - __process_display_type(); - return __it; - } - -protected: - /** - * The low-level std-format-spec parse function. - * - * @pre __begin points at the beginning of the std-format-spec. This means - * directly after the ':'. - * @pre The std-format-spec parses the entire input, or the first unmatched - * character is a '}'. - * - * @returns The iterator pointing at the last parsed character. - */ - _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) -> decltype(__parse_ctx.begin()) { - auto __begin = __parse_ctx.begin(); - auto __end = __parse_ctx.end(); - if (__begin == __end) - return __begin; - - __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, static_cast<_Flags&>(*this)); - if (__begin == __end) - return __begin; - - // An integer presentation type isn't defined in the Standard. - // Since a pointer is formatted as an integer it can be argued it's an - // integer presentation type. However there are two LWG-issues asserting it - // isn't an integer presentation type: - // - LWG3612 Inconsistent pointer alignment in std::format - // - LWG3644 std::format does not define "integer presentation type" - // - // There's a paper to make additional clarifications on the status of - // formatting pointers and proposes additional fields to be valid. That - // paper hasn't been reviewed by the Committee yet. - // - P2510 Formatting pointers - // - // The current implementation assumes formatting pointers isn't covered by - // "integer presentation type". - // TODO FMT Apply the LWG-issues/papers after approval/rejection by the Committee. - - __begin = __parser_width::__parse(__begin, __end, __parse_ctx); - if (__begin == __end) - return __begin; - - __begin = __parse_type(__begin, static_cast<_Flags&>(*this)); - - if (__begin != __end && *__begin != _CharT('}')) - __throw_format_error("The format-spec should consume the input or end with a '}'"); - - return __begin; - } - - /** Processes the parsed std-format-spec based on the parsed display type. */ - _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() { - switch (this->__type) { - case _Flags::_Type::__default: - this->__type = _Flags::_Type::__pointer; - break; - case _Flags::_Type::__pointer: - break; - default: - __throw_format_error("The format-spec type has a type not supported for a pointer argument"); - } - } -}; - /** Helper struct returned from @ref __get_string_alignment. */ template <class _CharT> struct _LIBCPP_TEMPLATE_VIS __string_alignment { @@ -1406,6 +568,13 @@ inline constexpr __fields __fields_integral{ .__zero_padding_ = true, .__locale_specific_form_ = true, .__type_ = true}; +inline constexpr __fields __fields_floating_point{ + .__sign_ = true, + .__alternate_form_ = true, + .__zero_padding_ = true, + .__precision_ = true, + .__locale_specific_form_ = true, + .__type_ = true}; inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true}; inline constexpr __fields __fields_pointer{.__type_ = true}; @@ -1872,17 +1041,9 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT } template <class _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_integer(__parser<_CharT>& __parser) { - if (__parser.__alignment_ == __alignment::__default) - __parser.__alignment_ = __alignment::__right; -} - -template <class _CharT> _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) { switch (__parser.__type_) { case __format_spec::__type::__default: - __parser.__type_ = __format_spec::__type::__string; - [[fallthrough]]; case __format_spec::__type::__string: __format_spec::__process_display_type_bool_string(__parser); break; @@ -1893,7 +1054,6 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __p case __format_spec::__type::__decimal: case __format_spec::__type::__hexadecimal_lower_case: case __format_spec::__type::__hexadecimal_upper_case: - __process_display_type_integer(__parser); break; default: @@ -1905,8 +1065,6 @@ template <class _CharT> _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) { switch (__parser.__type_) { case __format_spec::__type::__default: - __parser.__type_ = __format_spec::__type::__char; - [[fallthrough]]; case __format_spec::__type::__char: __format_spec::__process_display_type_char(__parser); break; @@ -1917,7 +1075,6 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __p case __format_spec::__type::__decimal: case __format_spec::__type::__hexadecimal_lower_case: case __format_spec::__type::__hexadecimal_upper_case: - __format_spec::__process_display_type_integer(__parser); break; default: @@ -1929,15 +1086,12 @@ template <class _CharT> _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) { switch (__parser.__type_) { case __format_spec::__type::__default: - __parser.__type_ = __format_spec::__type::__decimal; - [[fallthrough]]; case __format_spec::__type::__binary_lower_case: case __format_spec::__type::__binary_upper_case: case __format_spec::__type::__octal: case __format_spec::__type::__decimal: case __format_spec::__type::__hexadecimal_lower_case: case __format_spec::__type::__hexadecimal_upper_case: - __format_spec::__process_display_type_integer(__parser); break; case __format_spec::__type::__char: @@ -1949,6 +1103,35 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& } } +template <class _CharT> +_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser) { + switch (__parser.__type_) { + case __format_spec::__type::__default: + // When no precision specified then it keeps default since that + // formatting differs from the other types. + if (__parser.__precision_as_arg_ || __parser.__precision_ != -1) + __parser.__type_ = __format_spec::__type::__general_lower_case; + break; + case __format_spec::__type::__hexfloat_lower_case: + case __format_spec::__type::__hexfloat_upper_case: + // Precision specific behavior will be handled later. + break; + case __format_spec::__type::__scientific_lower_case: + case __format_spec::__type::__scientific_upper_case: + case __format_spec::__type::__fixed_lower_case: + case __format_spec::__type::__fixed_upper_case: + case __format_spec::__type::__general_lower_case: + case __format_spec::__type::__general_upper_case: + if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1) + // Set the default precision for the call to to_chars. + __parser.__precision_ = 6; + break; + + default: + std::__throw_format_error("The format-spec type has a type not supported for a floating-point argument"); + } +} + _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type) { switch (__type) { case __format_spec::__type::__default: diff --git a/libcxx/include/__functional/default_searcher.h b/libcxx/include/__functional/default_searcher.h index 05fb23d7c3c4..8e37082b6bed 100644 --- a/libcxx/include/__functional/default_searcher.h +++ b/libcxx/include/__functional/default_searcher.h @@ -12,6 +12,7 @@ #include <__algorithm/search.h> #include <__config> +#include <__functional/identity.h> #include <__functional/operations.h> #include <__iterator/iterator_traits.h> #include <__utility/pair.h> @@ -38,16 +39,15 @@ public: pair<_ForwardIterator2, _ForwardIterator2> operator () (_ForwardIterator2 __f, _ForwardIterator2 __l) const { - return _VSTD::__search(__f, __l, __first_, __last_, __pred_, - typename iterator_traits<_ForwardIterator>::iterator_category(), - typename iterator_traits<_ForwardIterator2>::iterator_category()); + auto __proj = __identity(); + return std::__search_impl(__f, __l, __first_, __last_, __pred_, __proj, __proj); } private: _ForwardIterator __first_; _ForwardIterator __last_; _BinaryPredicate __pred_; - }; +}; #endif // _LIBCPP_STD_VER > 14 diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h index 312443b67c3b..db3af6e24101 100644 --- a/libcxx/include/__functional/function.h +++ b/libcxx/include/__functional/function.h @@ -390,9 +390,9 @@ template <class _Rp, class... _ArgTypes> class __value_func<_Rp(_ArgTypes...)> typedef __base<_Rp(_ArgTypes...)> __func; __func* __f_; - _LIBCPP_NO_CFI static __func* __as_base(void* p) + _LIBCPP_NO_CFI static __func* __as_base(void* __p) { - return reinterpret_cast<__func*>(p); + return reinterpret_cast<__func*>(__p); } public: diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 20223014432f..6123a310ad63 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -178,16 +178,14 @@ struct __hash_key_value_types<__hash_value_type<_Key, _Tp> > { template <class _Up> _LIBCPP_INLINE_VISIBILITY - static typename enable_if<__is_same_uncvref<_Up, __node_value_type>::value, - __container_value_type const&>::type + static __enable_if_t<__is_same_uncvref<_Up, __node_value_type>::value, __container_value_type const&> __get_value(_Up& __t) { return __t.__get_value(); } template <class _Up> _LIBCPP_INLINE_VISIBILITY - static typename enable_if<__is_same_uncvref<_Up, __container_value_type>::value, - __container_value_type const&>::type + static __enable_if_t<__is_same_uncvref<_Up, __container_value_type>::value, __container_value_type const&> __get_value(_Up& __t) { return __t; } @@ -1049,10 +1047,8 @@ public: template <class _First, class _Second> _LIBCPP_INLINE_VISIBILITY - typename enable_if< - __can_extract_map_key<_First, key_type, __container_value_type>::value, - pair<iterator, bool> - >::type __emplace_unique(_First&& __f, _Second&& __s) { + __enable_if_t<__can_extract_map_key<_First, key_type, __container_value_type>::value, pair<iterator, bool> > + __emplace_unique(_First&& __f, _Second&& __s) { return __emplace_unique_key_args(__f, _VSTD::forward<_First>(__f), _VSTD::forward<_Second>(__s)); } @@ -1096,9 +1092,7 @@ public: return __emplace_unique_key_args(_NodeTypes::__get_key(__x), _VSTD::move(__x)); } - template <class _Pp, class = typename enable_if< - !__is_same_uncvref<_Pp, __container_value_type>::value - >::type> + template <class _Pp, class = __enable_if_t<!__is_same_uncvref<_Pp, __container_value_type>::value> > _LIBCPP_INLINE_VISIBILITY pair<iterator, bool> __insert_unique(_Pp&& __x) { return __emplace_unique(_VSTD::forward<_Pp>(__x)); @@ -1152,9 +1146,16 @@ public: #endif void clear() _NOEXCEPT; - void rehash(size_type __n); - _LIBCPP_INLINE_VISIBILITY void reserve(size_type __n) - {rehash(static_cast<size_type>(ceil(__n / max_load_factor())));} + _LIBCPP_INLINE_VISIBILITY void __rehash_unique(size_type __n) { __rehash<true>(__n); } + _LIBCPP_INLINE_VISIBILITY void __rehash_multi(size_type __n) { __rehash<false>(__n); } + _LIBCPP_INLINE_VISIBILITY void __reserve_unique(size_type __n) + { + __rehash_unique(static_cast<size_type>(ceil(__n / max_load_factor()))); + } + _LIBCPP_INLINE_VISIBILITY void __reserve_multi(size_type __n) + { + __rehash_multi(static_cast<size_type>(ceil(__n / max_load_factor()))); + } _LIBCPP_INLINE_VISIBILITY size_type bucket_count() const _NOEXCEPT @@ -1291,7 +1292,8 @@ public: #endif // _LIBCPP_ENABLE_DEBUG_MODE private: - void __rehash(size_type __n); + template <bool _UniqueKeys> void __rehash(size_type __n); + template <bool _UniqueKeys> void __do_rehash(size_type __n); template <class ..._Args> __node_holder __construct_node(_Args&& ...__args); @@ -1796,7 +1798,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_prepare( } if (size()+1 > __bc * max_load_factor() || __bc == 0) { - rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), + __rehash_unique(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), size_type(ceil(float(size() + 1) / max_load_factor())))); } return nullptr; @@ -1868,7 +1870,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare( size_type __bc = bucket_count(); if (size()+1 > __bc * max_load_factor() || __bc == 0) { - rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), + __rehash_multi(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), size_type(ceil(float(size() + 1) / max_load_factor())))); __bc = bucket_count(); } @@ -1962,7 +1964,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( size_type __bc = bucket_count(); if (size()+1 > __bc * max_load_factor() || __bc == 0) { - rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), + __rehash_multi(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), size_type(ceil(float(size() + 1) / max_load_factor())))); __bc = bucket_count(); } @@ -2010,7 +2012,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const& __node_holder __h = __construct_node_hash(__hash, _VSTD::forward<_Args>(__args)...); if (size()+1 > __bc * max_load_factor() || __bc == 0) { - rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), + __rehash_unique(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), size_type(ceil(float(size() + 1) / max_load_factor())))); __bc = bucket_count(); __chash = __constrain_hash(__hash, __bc); @@ -2213,8 +2215,9 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_merge_multi( #endif // _LIBCPP_STD_VER > 14 template <class _Tp, class _Hash, class _Equal, class _Alloc> +template <bool _UniqueKeys> void -__hash_table<_Tp, _Hash, _Equal, _Alloc>::rehash(size_type __n) +__hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n) _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK { if (__n == 1) @@ -2223,7 +2226,7 @@ _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __n = __next_prime(__n); size_type __bc = bucket_count(); if (__n > __bc) - __rehash(__n); + __do_rehash<_UniqueKeys>(__n); else if (__n < __bc) { __n = _VSTD::max<size_type> @@ -2233,13 +2236,14 @@ _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __next_prime(size_t(ceil(float(size()) / max_load_factor()))) ); if (__n < __bc) - __rehash(__n); + __do_rehash<_UniqueKeys>(__n); } } template <class _Tp, class _Hash, class _Equal, class _Alloc> +template <bool _UniqueKeys> void -__hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __nbc) +__hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) { std::__debug_db_invalidate_all(this); __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc(); @@ -2274,11 +2278,14 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __nbc) else { __next_pointer __np = __cp; - for (; __np->__next_ != nullptr && - key_eq()(__cp->__upcast()->__value_, - __np->__next_->__upcast()->__value_); - __np = __np->__next_) - ; + if _LIBCPP_CONSTEXPR_AFTER_CXX14 (!_UniqueKeys) + { + for (; __np->__next_ != nullptr && + key_eq()(__cp->__upcast()->__value_, + __np->__next_->__upcast()->__value_); + __np = __np->__next_) + ; + } __pp->__next_ = __np->__next_; __np->__next_ = __bucket_list_[__chash]->__next_; __bucket_list_[__chash]->__next_ = __cp; diff --git a/libcxx/include/__iterator/back_insert_iterator.h b/libcxx/include/__iterator/back_insert_iterator.h index 7bbf5b09e0e5..e9f9f2abec2a 100644 --- a/libcxx/include/__iterator/back_insert_iterator.h +++ b/libcxx/include/__iterator/back_insert_iterator.h @@ -46,11 +46,11 @@ public: typedef _Container container_type; _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit back_insert_iterator(_Container& __x) : container(_VSTD::addressof(__x)) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(const typename _Container::value_type& __value_) - {container->push_back(__value_); return *this;} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(const typename _Container::value_type& __value) + {container->push_back(__value); return *this;} #ifndef _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(typename _Container::value_type&& __value_) - {container->push_back(_VSTD::move(__value_)); return *this;} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(typename _Container::value_type&& __value) + {container->push_back(_VSTD::move(__value)); return *this;} #endif // _LIBCPP_CXX03_LANG _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator*() {return *this;} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator++() {return *this;} diff --git a/libcxx/include/__iterator/front_insert_iterator.h b/libcxx/include/__iterator/front_insert_iterator.h index 69b2d32d077a..9c8ec0028298 100644 --- a/libcxx/include/__iterator/front_insert_iterator.h +++ b/libcxx/include/__iterator/front_insert_iterator.h @@ -46,11 +46,11 @@ public: typedef _Container container_type; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit front_insert_iterator(_Container& __x) : container(_VSTD::addressof(__x)) {} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator=(const typename _Container::value_type& __value_) - {container->push_front(__value_); return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator=(const typename _Container::value_type& __value) + {container->push_front(__value); return *this;} #ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator=(typename _Container::value_type&& __value_) - {container->push_front(_VSTD::move(__value_)); return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator=(typename _Container::value_type&& __value) + {container->push_front(_VSTD::move(__value)); return *this;} #endif // _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator*() {return *this;} _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator++() {return *this;} diff --git a/libcxx/include/__iterator/insert_iterator.h b/libcxx/include/__iterator/insert_iterator.h index 8b313f2a85bb..b35d8bf16af9 100644 --- a/libcxx/include/__iterator/insert_iterator.h +++ b/libcxx/include/__iterator/insert_iterator.h @@ -57,11 +57,11 @@ public: _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator(_Container& __x, __insert_iterator_iter_t<_Container> __i) : container(_VSTD::addressof(__x)), iter(__i) {} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator=(const typename _Container::value_type& __value_) - {iter = container->insert(iter, __value_); ++iter; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator=(const typename _Container::value_type& __value) + {iter = container->insert(iter, __value); ++iter; return *this;} #ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator=(typename _Container::value_type&& __value_) - {iter = container->insert(iter, _VSTD::move(__value_)); ++iter; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator=(typename _Container::value_type&& __value) + {iter = container->insert(iter, _VSTD::move(__value)); ++iter; return *this;} #endif // _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator*() {return *this;} _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator++() {return *this;} diff --git a/libcxx/include/__iterator/iterator_traits.h b/libcxx/include/__iterator/iterator_traits.h index c3a5b7e0dd22..63525e230add 100644 --- a/libcxx/include/__iterator/iterator_traits.h +++ b/libcxx/include/__iterator/iterator_traits.h @@ -475,6 +475,18 @@ struct __is_exactly_cpp17_input_iterator __has_iterator_category_convertible_to<_Tp, input_iterator_tag>::value && !__has_iterator_category_convertible_to<_Tp, forward_iterator_tag>::value> {}; +template <class _Tp> +struct __is_exactly_cpp17_forward_iterator + : public integral_constant<bool, + __has_iterator_category_convertible_to<_Tp, forward_iterator_tag>::value && + !__has_iterator_category_convertible_to<_Tp, bidirectional_iterator_tag>::value> {}; + +template <class _Tp> +struct __is_exactly_cpp17_bidirectional_iterator + : public integral_constant<bool, + __has_iterator_category_convertible_to<_Tp, bidirectional_iterator_tag>::value && + !__has_iterator_category_convertible_to<_Tp, random_access_iterator_tag>::value> {}; + template<class _InputIterator> using __iter_value_type = typename iterator_traits<_InputIterator>::value_type; diff --git a/libcxx/include/__iterator/ostream_iterator.h b/libcxx/include/__iterator/ostream_iterator.h index 76ae4614939f..d16f5a26ebaa 100644 --- a/libcxx/include/__iterator/ostream_iterator.h +++ b/libcxx/include/__iterator/ostream_iterator.h @@ -53,9 +53,9 @@ public: : __out_stream_(_VSTD::addressof(__s)), __delim_(nullptr) {} _LIBCPP_INLINE_VISIBILITY ostream_iterator(ostream_type& __s, const _CharT* __delimiter) _NOEXCEPT : __out_stream_(_VSTD::addressof(__s)), __delim_(__delimiter) {} - _LIBCPP_INLINE_VISIBILITY ostream_iterator& operator=(const _Tp& __value_) + _LIBCPP_INLINE_VISIBILITY ostream_iterator& operator=(const _Tp& __value) { - *__out_stream_ << __value_; + *__out_stream_ << __value; if (__delim_) *__out_stream_ << __delim_; return *this; diff --git a/libcxx/include/__iterator/reverse_iterator.h b/libcxx/include/__iterator/reverse_iterator.h index 89bda19effef..a915609dbe33 100644 --- a/libcxx/include/__iterator/reverse_iterator.h +++ b/libcxx/include/__iterator/reverse_iterator.h @@ -15,15 +15,20 @@ #include <__compare/three_way_comparable.h> #include <__concepts/convertible_to.h> #include <__config> +#include <__iterator/advance.h> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iter_move.h> #include <__iterator/iter_swap.h> #include <__iterator/iterator.h> #include <__iterator/iterator_traits.h> +#include <__iterator/next.h> #include <__iterator/prev.h> #include <__iterator/readable_traits.h> #include <__memory/addressof.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/subrange.h> #include <__utility/move.h> #include <type_traits> @@ -136,7 +141,7 @@ public: #if _LIBCPP_STD_VER > 17 _LIBCPP_INLINE_VISIBILITY constexpr pointer operator->() const - requires is_pointer_v<_Iter> || requires(const _Iter i) { i.operator->(); } + requires is_pointer_v<_Iter> || requires(const _Iter __i) { __i.operator->(); } { if constexpr (is_pointer_v<_Iter>) { return std::prev(current); @@ -365,6 +370,16 @@ struct __rewrap_iter_impl<_ReverseWrapper<_OrigIter>, _UnwrappedIter> { } }; +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +template <ranges::bidirectional_range _Range> +_LIBCPP_HIDE_FROM_ABI constexpr ranges:: + subrange<reverse_iterator<ranges::iterator_t<_Range>>, reverse_iterator<ranges::iterator_t<_Range>>> + __reverse_range(_Range&& __range) { + auto __first = ranges::begin(__range); + return {std::make_reverse_iterator(ranges::next(__first, ranges::end(__range))), std::make_reverse_iterator(__first)}; +} +#endif + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ITERATOR_REVERSE_ITERATOR_H diff --git a/libcxx/include/__mutex_base b/libcxx/include/__mutex_base index da056b6d1423..ac0d090b7d19 100644 --- a/libcxx/include/__mutex_base +++ b/libcxx/include/__mutex_base @@ -338,11 +338,7 @@ private: template <class _Rep, class _Period> inline _LIBCPP_INLINE_VISIBILITY -typename enable_if -< - is_floating_point<_Rep>::value, - chrono::nanoseconds ->::type +__enable_if_t<is_floating_point<_Rep>::value, chrono::nanoseconds> __safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) { using namespace chrono; @@ -365,11 +361,7 @@ __safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) template <class _Rep, class _Period> inline _LIBCPP_INLINE_VISIBILITY -typename enable_if -< - !is_floating_point<_Rep>::value, - chrono::nanoseconds ->::type +__enable_if_t<!is_floating_point<_Rep>::value, chrono::nanoseconds> __safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) { using namespace chrono; diff --git a/libcxx/include/__numeric/iota.h b/libcxx/include/__numeric/iota.h index b30e0e0a5484..b7127a11cb75 100644 --- a/libcxx/include/__numeric/iota.h +++ b/libcxx/include/__numeric/iota.h @@ -21,10 +21,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _ForwardIterator, class _Tp> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 void -iota(_ForwardIterator __first, _ForwardIterator __last, _Tp __value_) +iota(_ForwardIterator __first, _ForwardIterator __last, _Tp __value) { - for (; __first != __last; ++__first, (void) ++__value_) - *__first = __value_; + for (; __first != __last; ++__first, (void) ++__value) + *__first = __value; } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__random/piecewise_constant_distribution.h b/libcxx/include/__random/piecewise_constant_distribution.h index a33ab0720062..9c9e14b16d6e 100644 --- a/libcxx/include/__random/piecewise_constant_distribution.h +++ b/libcxx/include/__random/piecewise_constant_distribution.h @@ -43,8 +43,8 @@ public: param_type(); template<class _InputIteratorB, class _InputIteratorW> - param_type(_InputIteratorB __fB, _InputIteratorB __lB, - _InputIteratorW __fW); + param_type(_InputIteratorB __f_b, _InputIteratorB __l_b, + _InputIteratorW __f_w); #ifndef _LIBCPP_CXX03_LANG template<class _UnaryOperation> param_type(initializer_list<result_type> __bl, _UnaryOperation __fw); @@ -94,10 +94,10 @@ public: piecewise_constant_distribution() {} template<class _InputIteratorB, class _InputIteratorW> _LIBCPP_INLINE_VISIBILITY - piecewise_constant_distribution(_InputIteratorB __fB, - _InputIteratorB __lB, - _InputIteratorW __fW) - : __p_(__fB, __lB, __fW) {} + piecewise_constant_distribution(_InputIteratorB __f_b, + _InputIteratorB __l_b, + _InputIteratorW __f_w) + : __p_(__f_b, __l_b, __f_w) {} #ifndef _LIBCPP_CXX03_LANG template<class _UnaryOperation> @@ -215,8 +215,8 @@ piecewise_constant_distribution<_RealType>::param_type::param_type() template<class _RealType> template<class _InputIteratorB, class _InputIteratorW> piecewise_constant_distribution<_RealType>::param_type::param_type( - _InputIteratorB __fB, _InputIteratorB __lB, _InputIteratorW __fW) - : __b_(__fB, __lB) + _InputIteratorB __f_b, _InputIteratorB __l_b, _InputIteratorW __f_w) + : __b_(__f_b, __l_b) { if (__b_.size() < 2) { @@ -229,8 +229,8 @@ piecewise_constant_distribution<_RealType>::param_type::param_type( else { __densities_.reserve(__b_.size() - 1); - for (size_t __i = 0; __i < __b_.size() - 1; ++__i, ++__fW) - __densities_.push_back(*__fW); + for (size_t __i = 0; __i < __b_.size() - 1; ++__i, ++__f_w) + __densities_.push_back(*__f_w); __init(); } } diff --git a/libcxx/include/__random/piecewise_linear_distribution.h b/libcxx/include/__random/piecewise_linear_distribution.h index e69ce9444072..05f00cef06ef 100644 --- a/libcxx/include/__random/piecewise_linear_distribution.h +++ b/libcxx/include/__random/piecewise_linear_distribution.h @@ -43,8 +43,8 @@ public: param_type(); template<class _InputIteratorB, class _InputIteratorW> - param_type(_InputIteratorB __fB, _InputIteratorB __lB, - _InputIteratorW __fW); + param_type(_InputIteratorB __f_b, _InputIteratorB __l_b, + _InputIteratorW __f_w); #ifndef _LIBCPP_CXX03_LANG template<class _UnaryOperation> param_type(initializer_list<result_type> __bl, _UnaryOperation __fw); @@ -94,10 +94,10 @@ public: piecewise_linear_distribution() {} template<class _InputIteratorB, class _InputIteratorW> _LIBCPP_INLINE_VISIBILITY - piecewise_linear_distribution(_InputIteratorB __fB, - _InputIteratorB __lB, - _InputIteratorW __fW) - : __p_(__fB, __lB, __fW) {} + piecewise_linear_distribution(_InputIteratorB __f_b, + _InputIteratorB __l_b, + _InputIteratorW __f_w) + : __p_(__f_b, __l_b, __f_w) {} #ifndef _LIBCPP_CXX03_LANG template<class _UnaryOperation> @@ -219,8 +219,8 @@ piecewise_linear_distribution<_RealType>::param_type::param_type() template<class _RealType> template<class _InputIteratorB, class _InputIteratorW> piecewise_linear_distribution<_RealType>::param_type::param_type( - _InputIteratorB __fB, _InputIteratorB __lB, _InputIteratorW __fW) - : __b_(__fB, __lB) + _InputIteratorB __f_b, _InputIteratorB __l_b, _InputIteratorW __f_w) + : __b_(__f_b, __l_b) { if (__b_.size() < 2) { @@ -233,8 +233,8 @@ piecewise_linear_distribution<_RealType>::param_type::param_type( else { __densities_.reserve(__b_.size()); - for (size_t __i = 0; __i < __b_.size(); ++__i, ++__fW) - __densities_.push_back(*__fW); + for (size_t __i = 0; __i < __b_.size(); ++__i, ++__f_w) + __densities_.push_back(*__f_w); __init(); } } diff --git a/libcxx/include/__ranges/zip_view.h b/libcxx/include/__ranges/zip_view.h index 560452aa7c69..a8035bc79e12 100644 --- a/libcxx/include/__ranges/zip_view.h +++ b/libcxx/include/__ranges/zip_view.h @@ -488,10 +488,10 @@ struct __fn { _LIBCPP_HIDE_FROM_ABI constexpr auto operator()() const noexcept { return empty_view<tuple<>>{}; } template <class... _Ranges> - _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Ranges&&... rs) const - noexcept(noexcept(zip_view<all_t<_Ranges&&>...>(std::forward<_Ranges>(rs)...))) - -> decltype(zip_view<all_t<_Ranges&&>...>(std::forward<_Ranges>(rs)...)) { - return zip_view<all_t<_Ranges>...>(std::forward<_Ranges>(rs)...); + _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Ranges&&... __rs) const + noexcept(noexcept(zip_view<all_t<_Ranges&&>...>(std::forward<_Ranges>(__rs)...))) + -> decltype(zip_view<all_t<_Ranges&&>...>(std::forward<_Ranges>(__rs)...)) { + return zip_view<all_t<_Ranges>...>(std::forward<_Ranges>(__rs)...); } }; diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer index e484e70440c9..7409b51b1f96 100644 --- a/libcxx/include/__split_buffer +++ b/libcxx/include/__split_buffer @@ -118,19 +118,10 @@ public: void __construct_at_end(size_type __n); void __construct_at_end(size_type __n, const_reference __x); template <class _InputIter> - typename enable_if - < - __is_cpp17_input_iterator<_InputIter>::value && - !__is_cpp17_forward_iterator<_InputIter>::value, - void - >::type + __enable_if_t<__is_exactly_cpp17_input_iterator<_InputIter>::value> __construct_at_end(_InputIter __first, _InputIter __last); template <class _ForwardIterator> - typename enable_if - < - __is_cpp17_forward_iterator<_ForwardIterator>::value, - void - >::type + __enable_if_t<__is_cpp17_forward_iterator<_ForwardIterator>::value> __construct_at_end(_ForwardIterator __first, _ForwardIterator __last); _LIBCPP_INLINE_VISIBILITY void __destruct_at_begin(pointer __new_begin) @@ -239,12 +230,7 @@ __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_referen template <class _Tp, class _Allocator> template <class _InputIter> -typename enable_if -< - __is_cpp17_input_iterator<_InputIter>::value && - !__is_cpp17_forward_iterator<_InputIter>::value, - void ->::type +__enable_if_t<__is_exactly_cpp17_input_iterator<_InputIter>::value> __split_buffer<_Tp, _Allocator>::__construct_at_end(_InputIter __first, _InputIter __last) { __alloc_rr& __a = this->__alloc(); @@ -267,11 +253,7 @@ __split_buffer<_Tp, _Allocator>::__construct_at_end(_InputIter __first, _InputIt template <class _Tp, class _Allocator> template <class _ForwardIterator> -typename enable_if -< - __is_cpp17_forward_iterator<_ForwardIterator>::value, - void ->::type +__enable_if_t<__is_cpp17_forward_iterator<_ForwardIterator>::value> __split_buffer<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last) { _ConstructTransaction __tx(&this->__end_, _VSTD::distance(__first, __last)); diff --git a/libcxx/include/__string/char_traits.h b/libcxx/include/__string/char_traits.h index 457a771b94cf..18ad67b28e16 100644 --- a/libcxx/include/__string/char_traits.h +++ b/libcxx/include/__string/char_traits.h @@ -802,9 +802,7 @@ __str_rfind(const _CharT *__p, _SizeT __sz, __pos += __n; else __pos = __sz; - const _CharT* __r = _VSTD::__find_end( - __p, __p + __pos, __s, __s + __n, _Traits::eq, - random_access_iterator_tag(), random_access_iterator_tag()); + const _CharT* __r = std::__find_end_classic(__p, __p + __pos, __s, __s + __n, _Traits::eq); if (__n > 0 && __r == __p + __pos) return __npos; return static_cast<_SizeT>(__r - __p); diff --git a/libcxx/include/__threading_support b/libcxx/include/__threading_support index 8f1efb7854b7..a7f0da972a8d 100644 --- a/libcxx/include/__threading_support +++ b/libcxx/include/__threading_support @@ -201,15 +201,15 @@ int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv); // Execute once _LIBCPP_THREAD_ABI_VISIBILITY -int __libcpp_execute_once(__libcpp_exec_once_flag *flag, - void (*init_routine)()); +int __libcpp_execute_once(__libcpp_exec_once_flag *__flag, + void (*__init_routine)()); // Thread id _LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_thread_id_equal(__libcpp_thread_id t1, __libcpp_thread_id t2); +bool __libcpp_thread_id_equal(__libcpp_thread_id __t1, __libcpp_thread_id __t2); _LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_thread_id_less(__libcpp_thread_id t1, __libcpp_thread_id t2); +bool __libcpp_thread_id_less(__libcpp_thread_id __t1, __libcpp_thread_id __t2); // Thread _LIBCPP_THREAD_ABI_VISIBILITY @@ -347,22 +347,22 @@ int __libcpp_condvar_destroy(__libcpp_condvar_t *__cv) } // Execute once -int __libcpp_execute_once(__libcpp_exec_once_flag *flag, - void (*init_routine)()) { - return pthread_once(flag, init_routine); +int __libcpp_execute_once(__libcpp_exec_once_flag *__flag, + void (*__init_routine)()) { + return pthread_once(__flag, __init_routine); } // Thread id // Returns non-zero if the thread ids are equal, otherwise 0 -bool __libcpp_thread_id_equal(__libcpp_thread_id t1, __libcpp_thread_id t2) +bool __libcpp_thread_id_equal(__libcpp_thread_id __t1, __libcpp_thread_id __t2) { - return t1 == t2; + return __t1 == __t2; } // Returns non-zero if t1 < t2, otherwise 0 -bool __libcpp_thread_id_less(__libcpp_thread_id t1, __libcpp_thread_id t2) +bool __libcpp_thread_id_less(__libcpp_thread_id __t1, __libcpp_thread_id __t2) { - return t1 < t2; + return __t1 < __t2; } // Thread diff --git a/libcxx/include/__tree b/libcxx/include/__tree index e5dd1f4d45ea..8d8449706871 100644 --- a/libcxx/include/__tree +++ b/libcxx/include/__tree @@ -597,8 +597,7 @@ struct __tree_key_value_types<__value_type<_Key, _Tp> > { template <class _Up> _LIBCPP_INLINE_VISIBILITY - static typename enable_if<__is_same_uncvref<_Up, __container_value_type>::value, - key_type const&>::type + static __enable_if_t<__is_same_uncvref<_Up, __container_value_type>::value, key_type const&> __get_key(_Up& __t) { return __t.first; } @@ -611,8 +610,7 @@ struct __tree_key_value_types<__value_type<_Key, _Tp> > { template <class _Up> _LIBCPP_INLINE_VISIBILITY - static typename enable_if<__is_same_uncvref<_Up, __container_value_type>::value, - __container_value_type const&>::type + static __enable_if_t<__is_same_uncvref<_Up, __container_value_type>::value, __container_value_type const&> __get_value(_Up& __t) { return __t; } @@ -1175,10 +1173,8 @@ public: template <class _First, class _Second> _LIBCPP_INLINE_VISIBILITY - typename enable_if< - __can_extract_map_key<_First, key_type, __container_value_type>::value, - pair<iterator, bool> - >::type __emplace_unique(_First&& __f, _Second&& __s) { + __enable_if_t<__can_extract_map_key<_First, key_type, __container_value_type>::value, pair<iterator, bool> > + __emplace_unique(_First&& __f, _Second&& __s) { return __emplace_unique_key_args(__f, _VSTD::forward<_First>(__f), _VSTD::forward<_Second>(__s)); } @@ -1219,10 +1215,8 @@ public: template <class _First, class _Second> _LIBCPP_INLINE_VISIBILITY - typename enable_if< - __can_extract_map_key<_First, key_type, __container_value_type>::value, - iterator - >::type __emplace_hint_unique(const_iterator __p, _First&& __f, _Second&& __s) { + __enable_if_t<__can_extract_map_key<_First, key_type, __container_value_type>::value, iterator> + __emplace_hint_unique(const_iterator __p, _First&& __f, _Second&& __s) { return __emplace_hint_unique_key_args(__p, __f, _VSTD::forward<_First>(__f), _VSTD::forward<_Second>(__s)).first; @@ -1275,21 +1269,15 @@ public: return __emplace_hint_unique_key_args(__p, _NodeTypes::__get_key(__v), _VSTD::move(__v)).first; } - template <class _Vp, class = typename enable_if< - !is_same<typename __unconstref<_Vp>::type, - __container_value_type - >::value - >::type> + template <class _Vp, + class = __enable_if_t<!is_same<typename __unconstref<_Vp>::type, __container_value_type>::value> > _LIBCPP_INLINE_VISIBILITY pair<iterator, bool> __insert_unique(_Vp&& __v) { return __emplace_unique(_VSTD::forward<_Vp>(__v)); } - template <class _Vp, class = typename enable_if< - !is_same<typename __unconstref<_Vp>::type, - __container_value_type - >::value - >::type> + template <class _Vp, + class = __enable_if_t<!is_same<typename __unconstref<_Vp>::type, __container_value_type>::value> > _LIBCPP_INLINE_VISIBILITY iterator __insert_unique(const_iterator __p, _Vp&& __v) { return __emplace_hint_unique(__p, _VSTD::forward<_Vp>(__v)); diff --git a/libcxx/include/__tuple b/libcxx/include/__tuple index 6d13bb24c579..f85036e7af1d 100644 --- a/libcxx/include/__tuple +++ b/libcxx/include/__tuple @@ -30,14 +30,14 @@ using __enable_if_tuple_size_imp = _Tp; template <class _Tp> struct _LIBCPP_TEMPLATE_VIS tuple_size<__enable_if_tuple_size_imp< const _Tp, - typename enable_if<!is_volatile<_Tp>::value>::type, + __enable_if_t<!is_volatile<_Tp>::value>, integral_constant<size_t, sizeof(tuple_size<_Tp>)>>> : public integral_constant<size_t, tuple_size<_Tp>::value> {}; template <class _Tp> struct _LIBCPP_TEMPLATE_VIS tuple_size<__enable_if_tuple_size_imp< volatile _Tp, - typename enable_if<!is_const<_Tp>::value>::type, + __enable_if_t<!is_const<_Tp>::value>, integral_constant<size_t, sizeof(tuple_size<_Tp>)>>> : public integral_constant<size_t, tuple_size<_Tp>::value> {}; @@ -393,7 +393,7 @@ struct __tuple_sfinae_base { template <template <class, class...> class _Trait, class ..._LArgs, class ..._RArgs> static auto __do_test(__tuple_types<_LArgs...>, __tuple_types<_RArgs...>) - -> __all<typename enable_if<_Trait<_LArgs, _RArgs>::value, bool>::type{true}...>; + -> __all<__enable_if_t<_Trait<_LArgs, _RArgs>::value, bool>{true}...>; template <template <class...> class> static auto __do_test(...) -> false_type; diff --git a/libcxx/include/__type_traits/extent.h b/libcxx/include/__type_traits/extent.h index 935ec4937c0a..0a4d84e05e23 100644 --- a/libcxx/include/__type_traits/extent.h +++ b/libcxx/include/__type_traits/extent.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__array_extent) +#if __has_builtin(__array_extent) template<class _Tp, size_t _Dim = 0> struct _LIBCPP_TEMPLATE_VIS extent @@ -30,7 +30,7 @@ template <class _Tp, unsigned _Ip = 0> inline constexpr size_t extent_v = __array_extent(_Tp, _Ip); #endif -#else // __has_keyword(__array_extent) +#else // __has_builtin(__array_extent) template <class _Tp, unsigned _Ip = 0> struct _LIBCPP_TEMPLATE_VIS extent : public integral_constant<size_t, 0> {}; @@ -48,7 +48,7 @@ template <class _Tp, unsigned _Ip = 0> inline constexpr size_t extent_v = extent<_Tp, _Ip>::value; #endif -#endif // __has_keyword(__array_extent) +#endif // __has_builtin(__array_extent) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/has_virtual_destructor.h b/libcxx/include/__type_traits/has_virtual_destructor.h index 33574373632e..1f0bd188b717 100644 --- a/libcxx/include/__type_traits/has_virtual_destructor.h +++ b/libcxx/include/__type_traits/has_virtual_destructor.h @@ -18,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_feature(has_virtual_destructor) || defined(_LIBCPP_COMPILER_GCC) +#if __has_builtin(__has_virtual_destructor) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS has_virtual_destructor : public integral_constant<bool, __has_virtual_destructor(_Tp)> {}; diff --git a/libcxx/include/__type_traits/is_array.h b/libcxx/include/__type_traits/is_array.h index 766d2a203028..bc105908982c 100644 --- a/libcxx/include/__type_traits/is_array.h +++ b/libcxx/include/__type_traits/is_array.h @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // TODO: Clang incorrectly reports that __is_array is true for T[0]. // Re-enable the branch once https://llvm.org/PR54705 is fixed. -#if __has_keyword(__is_array) && 0 +#if __has_builtin(__is_array) && 0 template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_array : _BoolConstant<__is_array(_Tp)> { }; @@ -45,7 +45,7 @@ template <class _Tp> inline constexpr bool is_array_v = is_array<_Tp>::value; #endif -#endif // __has_keyword(__is_array) +#endif // __has_builtin(__is_array) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_assignable.h b/libcxx/include/__type_traits/is_assignable.h index edc864cae065..b8cb6df9df4a 100644 --- a/libcxx/include/__type_traits/is_assignable.h +++ b/libcxx/include/__type_traits/is_assignable.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template<typename, typename _Tp> struct __select_2nd { typedef _LIBCPP_NODEBUG _Tp type; }; -#if __has_keyword(__is_assignable) +#if __has_builtin(__is_assignable) template<class _Tp, class _Up> struct _LIBCPP_TEMPLATE_VIS is_assignable : _BoolConstant<__is_assignable(_Tp, _Up)> { }; @@ -30,7 +30,7 @@ template <class _Tp, class _Arg> inline constexpr bool is_assignable_v = __is_assignable(_Tp, _Arg); #endif -#else // __has_keyword(__is_assignable) +#else // __has_builtin(__is_assignable) template <class _Tp, class _Arg> typename __select_2nd<decltype((declval<_Tp>() = declval<_Arg>())), true_type>::type @@ -59,7 +59,7 @@ template <class _Tp, class _Arg> inline constexpr bool is_assignable_v = is_assignable<_Tp, _Arg>::value; #endif -#endif // __has_keyword(__is_assignable) +#endif // __has_builtin(__is_assignable) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_compound.h b/libcxx/include/__type_traits/is_compound.h index 643edd78229a..1395ed8d417f 100644 --- a/libcxx/include/__type_traits/is_compound.h +++ b/libcxx/include/__type_traits/is_compound.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_compound) +#if __has_builtin(__is_compound) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_compound : _BoolConstant<__is_compound(_Tp)> { }; @@ -29,7 +29,7 @@ template <class _Tp> inline constexpr bool is_compound_v = __is_compound(_Tp); #endif -#else // __has_keyword(__is_compound) +#else // __has_builtin(__is_compound) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_compound : public integral_constant<bool, !is_fundamental<_Tp>::value> {}; @@ -39,7 +39,7 @@ template <class _Tp> inline constexpr bool is_compound_v = is_compound<_Tp>::value; #endif -#endif // __has_keyword(__is_compound) +#endif // __has_builtin(__is_compound) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_const.h b/libcxx/include/__type_traits/is_const.h index 5501832f560b..42b892c58d50 100644 --- a/libcxx/include/__type_traits/is_const.h +++ b/libcxx/include/__type_traits/is_const.h @@ -18,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_const) +#if __has_builtin(__is_const) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_const : _BoolConstant<__is_const(_Tp)> { }; @@ -38,7 +38,7 @@ template <class _Tp> inline constexpr bool is_const_v = is_const<_Tp>::value; #endif -#endif // __has_keyword(__is_const) +#endif // __has_builtin(__is_const) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_convertible.h b/libcxx/include/__type_traits/is_convertible.h index 884e808e2ae4..7e49cd4e6a31 100644 --- a/libcxx/include/__type_traits/is_convertible.h +++ b/libcxx/include/__type_traits/is_convertible.h @@ -24,12 +24,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_feature(is_convertible_to) && !defined(_LIBCPP_USE_IS_CONVERTIBLE_FALLBACK) +#if __has_builtin(__is_convertible_to) && !defined(_LIBCPP_USE_IS_CONVERTIBLE_FALLBACK) template <class _T1, class _T2> struct _LIBCPP_TEMPLATE_VIS is_convertible : public integral_constant<bool, __is_convertible_to(_T1, _T2)> {}; -#else // __has_feature(is_convertible_to) +#else // __has_builtin(__is_convertible_to) && !defined(_LIBCPP_USE_IS_CONVERTIBLE_FALLBACK) namespace __is_convertible_imp { @@ -96,7 +96,7 @@ template <class _T1, class _T2> struct _LIBCPP_TEMPLATE_VIS is_convertible static const size_t __complete_check2 = __is_convertible_check<_T2>::__v; }; -#endif // __has_feature(is_convertible_to) +#endif // __has_builtin(__is_convertible_to) && !defined(_LIBCPP_USE_IS_CONVERTIBLE_FALLBACK) #if _LIBCPP_STD_VER > 14 template <class _From, class _To> diff --git a/libcxx/include/__type_traits/is_destructible.h b/libcxx/include/__type_traits/is_destructible.h index 489451007873..5e9ac5579fdd 100644 --- a/libcxx/include/__type_traits/is_destructible.h +++ b/libcxx/include/__type_traits/is_destructible.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_destructible) +#if __has_builtin(__is_destructible) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_destructible : _BoolConstant<__is_destructible(_Tp)> { }; @@ -32,7 +32,7 @@ template <class _Tp> inline constexpr bool is_destructible_v = __is_destructible(_Tp); #endif -#else // __has_keyword(__is_destructible) +#else // __has_builtin(__is_destructible) // if it's a reference, return true // if it's a function, return false @@ -95,7 +95,7 @@ template <class _Tp> inline constexpr bool is_destructible_v = is_destructible<_Tp>::value; #endif -#endif // __has_keyword(__is_destructible) +#endif // __has_builtin(__is_destructible) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_function.h b/libcxx/include/__type_traits/is_function.h index bce980c21b4e..53f34b39eb9a 100644 --- a/libcxx/include/__type_traits/is_function.h +++ b/libcxx/include/__type_traits/is_function.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_function) +#if __has_builtin(__is_function) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_function : integral_constant<bool, __is_function(_Tp)> {}; @@ -31,7 +31,7 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_function : public integral_constant<bool, !(is_reference<_Tp>::value || is_const<const _Tp>::value)> {}; -#endif // __has_keyword(__is_function) +#endif // __has_builtin(__is_function) #if _LIBCPP_STD_VER > 14 template <class _Tp> diff --git a/libcxx/include/__type_traits/is_fundamental.h b/libcxx/include/__type_traits/is_fundamental.h index aaa7063eef9b..46f81a103583 100644 --- a/libcxx/include/__type_traits/is_fundamental.h +++ b/libcxx/include/__type_traits/is_fundamental.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_fundamental) +#if __has_builtin(__is_fundamental) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_fundamental : _BoolConstant<__is_fundamental(_Tp)> { }; @@ -30,7 +30,7 @@ template <class _Tp> inline constexpr bool is_fundamental_v = __is_fundamental(_Tp); #endif -#else // __has_keyword(__is_fundamental) +#else // __has_builtin(__is_fundamental) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_fundamental : public integral_constant<bool, is_void<_Tp>::value || @@ -42,7 +42,7 @@ template <class _Tp> inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value; #endif -#endif // __has_keyword(__is_fundamental) +#endif // __has_builtin(__is_fundamental) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_integral.h b/libcxx/include/__type_traits/is_integral.h index 09378735d94e..c1c573a75e0a 100644 --- a/libcxx/include/__type_traits/is_integral.h +++ b/libcxx/include/__type_traits/is_integral.h @@ -45,7 +45,7 @@ template <> struct __libcpp_is_integral<__int128_t> { enum { va template <> struct __libcpp_is_integral<__uint128_t> { enum { value = 1 }; }; #endif -#if __has_keyword(__is_integral) +#if __has_builtin(__is_integral) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_integral : _BoolConstant<__is_integral(_Tp)> { }; @@ -65,7 +65,7 @@ template <class _Tp> inline constexpr bool is_integral_v = is_integral<_Tp>::value; #endif -#endif // __has_keyword(__is_integral) +#endif // __has_builtin(__is_integral) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_member_function_pointer.h b/libcxx/include/__type_traits/is_member_function_pointer.h index f18f3ebc5a3a..2d2ff81e091a 100644 --- a/libcxx/include/__type_traits/is_member_function_pointer.h +++ b/libcxx/include/__type_traits/is_member_function_pointer.h @@ -36,7 +36,7 @@ template <class _Tp, class _Up> struct __libcpp_is_member_pointer<_Tp _Up::*> { }; }; -#if __has_keyword(__is_member_function_pointer) +#if __has_builtin(__is_member_function_pointer) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_function_pointer @@ -47,7 +47,7 @@ template <class _Tp> inline constexpr bool is_member_function_pointer_v = __is_member_function_pointer(_Tp); #endif -#else // __has_keyword(__is_member_function_pointer) +#else // __has_builtin(__is_member_function_pointer) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_function_pointer : public _BoolConstant< __libcpp_is_member_pointer<typename remove_cv<_Tp>::type>::__is_func > {}; @@ -57,7 +57,7 @@ template <class _Tp> inline constexpr bool is_member_function_pointer_v = is_member_function_pointer<_Tp>::value; #endif -#endif // __has_keyword(__is_member_function_pointer) +#endif // __has_builtin(__is_member_function_pointer) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_member_object_pointer.h b/libcxx/include/__type_traits/is_member_object_pointer.h index 41f81d529388..250a22c97440 100644 --- a/libcxx/include/__type_traits/is_member_object_pointer.h +++ b/libcxx/include/__type_traits/is_member_object_pointer.h @@ -18,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_member_object_pointer) +#if __has_builtin(__is_member_object_pointer) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_object_pointer @@ -29,7 +29,7 @@ template <class _Tp> inline constexpr bool is_member_object_pointer_v = __is_member_object_pointer(_Tp); #endif -#else // __has_keyword(__is_member_object_pointer) +#else // __has_builtin(__is_member_object_pointer) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_object_pointer : public _BoolConstant< __libcpp_is_member_pointer<typename remove_cv<_Tp>::type>::__is_obj > {}; @@ -39,7 +39,7 @@ template <class _Tp> inline constexpr bool is_member_object_pointer_v = is_member_object_pointer<_Tp>::value; #endif -#endif // __has_keyword(__is_member_object_pointer) +#endif // __has_builtin(__is_member_object_pointer) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_member_pointer.h b/libcxx/include/__type_traits/is_member_pointer.h index 76595dfdab7f..448bcc23d55e 100644 --- a/libcxx/include/__type_traits/is_member_pointer.h +++ b/libcxx/include/__type_traits/is_member_pointer.h @@ -18,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_member_pointer) +#if __has_builtin(__is_member_pointer) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_pointer : _BoolConstant<__is_member_pointer(_Tp)> { }; @@ -28,7 +28,7 @@ template <class _Tp> inline constexpr bool is_member_pointer_v = __is_member_pointer(_Tp); #endif -#else // __has_keyword(__is_member_pointer) +#else // __has_builtin(__is_member_pointer) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_pointer : public _BoolConstant< __libcpp_is_member_pointer<typename remove_cv<_Tp>::type>::__is_member > {}; @@ -38,7 +38,7 @@ template <class _Tp> inline constexpr bool is_member_pointer_v = is_member_pointer<_Tp>::value; #endif -#endif // __has_keyword(__is_member_pointer) +#endif // __has_builtin(__is_member_pointer) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_nothrow_assignable.h b/libcxx/include/__type_traits/is_nothrow_assignable.h index 18e0e70a561d..e3ce33ece895 100644 --- a/libcxx/include/__type_traits/is_nothrow_assignable.h +++ b/libcxx/include/__type_traits/is_nothrow_assignable.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_nothrow_assignable) +#if __has_builtin(__is_nothrow_assignable) template <class _Tp, class _Arg> struct _LIBCPP_TEMPLATE_VIS is_nothrow_assignable @@ -47,7 +47,7 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_assignable { }; -#endif // _LIBCPP_HAS_NO_NOEXCEPT +#endif // __has_builtin(__is_nothrow_assignable) #if _LIBCPP_STD_VER > 14 template <class _Tp, class _Arg> diff --git a/libcxx/include/__type_traits/is_nothrow_constructible.h b/libcxx/include/__type_traits/is_nothrow_constructible.h index 1f25d61baa63..92d6e8343e03 100644 --- a/libcxx/include/__type_traits/is_nothrow_constructible.h +++ b/libcxx/include/__type_traits/is_nothrow_constructible.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_nothrow_constructible) +#if __has_builtin(__is_nothrow_constructible) template <class _Tp, class... _Args> struct _LIBCPP_TEMPLATE_VIS is_nothrow_constructible @@ -62,7 +62,7 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_constructible<_Tp[_Ns]> { }; -#endif // _LIBCPP_HAS_NO_NOEXCEPT +#endif // __has_builtin(__is_nothrow_constructible) #if _LIBCPP_STD_VER > 14 diff --git a/libcxx/include/__type_traits/is_object.h b/libcxx/include/__type_traits/is_object.h index 0d8339d19492..943f1a736cd5 100644 --- a/libcxx/include/__type_traits/is_object.h +++ b/libcxx/include/__type_traits/is_object.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_object) +#if __has_builtin(__is_object) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_object : _BoolConstant<__is_object(_Tp)> { }; @@ -32,7 +32,7 @@ template <class _Tp> inline constexpr bool is_object_v = __is_object(_Tp); #endif -#else // __has_keyword(__is_object) +#else // __has_builtin(__is_object) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_object : public integral_constant<bool, is_scalar<_Tp>::value || @@ -45,7 +45,7 @@ template <class _Tp> inline constexpr bool is_object_v = is_object<_Tp>::value; #endif -#endif // __has_keyword(__is_object) +#endif // __has_builtin(__is_object) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_pod.h b/libcxx/include/__type_traits/is_pod.h index 4317182f8964..497060e00557 100644 --- a/libcxx/include/__type_traits/is_pod.h +++ b/libcxx/include/__type_traits/is_pod.h @@ -18,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_feature(is_pod) || defined(_LIBCPP_COMPILER_GCC) +#if __has_builtin(__is_pod) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_pod : public integral_constant<bool, __is_pod(_Tp)> {}; @@ -31,7 +31,7 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_pod is_trivially_copy_assignable<_Tp>::value && is_trivially_destructible<_Tp>::value> {}; -#endif +#endif // __has_builtin(__is_pod) #if _LIBCPP_STD_VER > 14 template <class _Tp> diff --git a/libcxx/include/__type_traits/is_pointer.h b/libcxx/include/__type_traits/is_pointer.h index 2139b03282e7..63c82ae4715c 100644 --- a/libcxx/include/__type_traits/is_pointer.h +++ b/libcxx/include/__type_traits/is_pointer.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_pointer) +#if __has_builtin(__is_pointer) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_pointer : _BoolConstant<__is_pointer(_Tp)> { }; @@ -29,7 +29,7 @@ template <class _Tp> inline constexpr bool is_pointer_v = __is_pointer(_Tp); #endif -#else // __has_keyword(__is_pointer) +#else // __has_builtin(__is_pointer) template <class _Tp> struct __libcpp_is_pointer : public false_type {}; template <class _Tp> struct __libcpp_is_pointer<_Tp*> : public true_type {}; @@ -50,7 +50,7 @@ template <class _Tp> inline constexpr bool is_pointer_v = is_pointer<_Tp>::value; #endif -#endif // __has_keyword(__is_pointer) +#endif // __has_builtin(__is_pointer) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_reference.h b/libcxx/include/__type_traits/is_reference.h index 8aa55c13ff83..27ca2ddb5a72 100644 --- a/libcxx/include/__type_traits/is_reference.h +++ b/libcxx/include/__type_traits/is_reference.h @@ -18,9 +18,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_lvalue_reference) && \ - __has_keyword(__is_rvalue_reference) && \ - __has_keyword(__is_reference) +#if __has_builtin(__is_lvalue_reference) && \ + __has_builtin(__is_rvalue_reference) && \ + __has_builtin(__is_reference) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_lvalue_reference : _BoolConstant<__is_lvalue_reference(_Tp)> { }; @@ -40,7 +40,7 @@ template <class _Tp> inline constexpr bool is_rvalue_reference_v = __is_rvalue_reference(_Tp); #endif -#else // __has_keyword(__is_lvalue_reference) && etc... +#else // __has_builtin(__is_lvalue_reference) && etc... template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_lvalue_reference : public false_type {}; template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_lvalue_reference<_Tp&> : public true_type {}; @@ -63,7 +63,7 @@ template <class _Tp> inline constexpr bool is_rvalue_reference_v = is_rvalue_reference<_Tp>::value; #endif -#endif // __has_keyword(__is_lvalue_reference) && etc... +#endif // __has_builtin(__is_lvalue_reference) && etc... _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_scalar.h b/libcxx/include/__type_traits/is_scalar.h index 0ca34c7cf79b..ee856dbbfec7 100644 --- a/libcxx/include/__type_traits/is_scalar.h +++ b/libcxx/include/__type_traits/is_scalar.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_scalar) +#if __has_builtin(__is_scalar) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_scalar : _BoolConstant<__is_scalar(_Tp)> { }; @@ -32,7 +32,7 @@ template <class _Tp> inline constexpr bool is_scalar_v = __is_scalar(_Tp); #endif -#else // __has_keyword(__is_scalar) +#else // __has_builtin(__is_scalar) template <class _Tp> struct __is_block : false_type {}; #if defined(_LIBCPP_HAS_EXTENSION_BLOCKS) @@ -54,7 +54,7 @@ template <class _Tp> inline constexpr bool is_scalar_v = is_scalar<_Tp>::value; #endif -#endif // __has_keyword(__is_scalar) +#endif // __has_builtin(__is_scalar) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_signed.h b/libcxx/include/__type_traits/is_signed.h index 241d6f551a4b..e9722c9b2192 100644 --- a/libcxx/include/__type_traits/is_signed.h +++ b/libcxx/include/__type_traits/is_signed.h @@ -18,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_signed) +#if __has_builtin(__is_signed) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_signed : _BoolConstant<__is_signed(_Tp)> { }; @@ -28,7 +28,7 @@ template <class _Tp> inline constexpr bool is_signed_v = __is_signed(_Tp); #endif -#else // __has_keyword(__is_signed) +#else // __has_builtin(__is_signed) template <class _Tp, bool = is_integral<_Tp>::value> struct __libcpp_is_signed_impl : public _BoolConstant<(_Tp(-1) < _Tp(0))> {}; @@ -48,7 +48,7 @@ template <class _Tp> inline constexpr bool is_signed_v = is_signed<_Tp>::value; #endif -#endif // __has_keyword(__is_signed) +#endif // __has_builtin(__is_signed) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_standard_layout.h b/libcxx/include/__type_traits/is_standard_layout.h index 375d08721e90..0d8b5f480f0c 100644 --- a/libcxx/include/__type_traits/is_standard_layout.h +++ b/libcxx/include/__type_traits/is_standard_layout.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_standard_layout -#if __has_feature(is_standard_layout) || defined(_LIBCPP_COMPILER_GCC) +#if __has_builtin(__is_standard_layout) : public integral_constant<bool, __is_standard_layout(_Tp)> #else : integral_constant<bool, is_scalar<typename remove_all_extents<_Tp>::type>::value> diff --git a/libcxx/include/__type_traits/is_trivial.h b/libcxx/include/__type_traits/is_trivial.h index 011963c3d0e6..73c2093d4082 100644 --- a/libcxx/include/__type_traits/is_trivial.h +++ b/libcxx/include/__type_traits/is_trivial.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivial -#if __has_feature(is_trivial) || defined(_LIBCPP_COMPILER_GCC) +#if __has_builtin(__is_trivial) : public integral_constant<bool, __is_trivial(_Tp)> #else : integral_constant<bool, is_trivially_copyable<_Tp>::value && diff --git a/libcxx/include/__type_traits/is_trivially_destructible.h b/libcxx/include/__type_traits/is_trivially_destructible.h index 81181cdf841d..3376c3eeff43 100644 --- a/libcxx/include/__type_traits/is_trivially_destructible.h +++ b/libcxx/include/__type_traits/is_trivially_destructible.h @@ -18,12 +18,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_trivially_destructible) +#if __has_builtin(__is_trivially_destructible) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_destructible : public integral_constant<bool, __is_trivially_destructible(_Tp)> {}; -#elif __has_feature(has_trivial_destructor) || defined(_LIBCPP_COMPILER_GCC) +#elif __has_builtin(__has_trivial_destructor) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_destructible : public integral_constant<bool, is_destructible<_Tp>::value && __has_trivial_destructor(_Tp)> {}; @@ -40,7 +40,7 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_destructible template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_destructible<_Tp[]> : public false_type {}; -#endif +#endif // __has_builtin(__is_trivially_destructible) #if _LIBCPP_STD_VER > 14 template <class _Tp> diff --git a/libcxx/include/__type_traits/is_unsigned.h b/libcxx/include/__type_traits/is_unsigned.h index bb279fdb729d..17cd909d5478 100644 --- a/libcxx/include/__type_traits/is_unsigned.h +++ b/libcxx/include/__type_traits/is_unsigned.h @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // Before AppleClang 14, __is_unsigned returned true for enums with signed underlying type. -#if __has_keyword(__is_unsigned) && !(defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1400) +#if __has_builtin(__is_unsigned) && !(defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1400) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_unsigned : _BoolConstant<__is_unsigned(_Tp)> { }; @@ -31,7 +31,7 @@ template <class _Tp> inline constexpr bool is_unsigned_v = __is_unsigned(_Tp); #endif -#else // __has_keyword(__is_unsigned) +#else // __has_builtin(__is_unsigned) template <class _Tp, bool = is_integral<_Tp>::value> struct __libcpp_is_unsigned_impl : public _BoolConstant<(_Tp(0) < _Tp(-1))> {}; @@ -51,7 +51,7 @@ template <class _Tp> inline constexpr bool is_unsigned_v = is_unsigned<_Tp>::value; #endif -#endif // __has_keyword(__is_unsigned) +#endif // __has_builtin(__is_unsigned) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_void.h b/libcxx/include/__type_traits/is_void.h index 29e68cc529df..4cebf18633b2 100644 --- a/libcxx/include/__type_traits/is_void.h +++ b/libcxx/include/__type_traits/is_void.h @@ -18,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_void) +#if __has_builtin(__is_void) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_void : _BoolConstant<__is_void(_Tp)> { }; @@ -38,7 +38,7 @@ template <class _Tp> inline constexpr bool is_void_v = is_void<_Tp>::value; #endif -#endif // __has_keyword(__is_void) +#endif // __has_builtin(__is_void) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_volatile.h b/libcxx/include/__type_traits/is_volatile.h index 372703e7ced6..fb922679d62b 100644 --- a/libcxx/include/__type_traits/is_volatile.h +++ b/libcxx/include/__type_traits/is_volatile.h @@ -18,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__is_volatile) +#if __has_builtin(__is_volatile) template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_volatile : _BoolConstant<__is_volatile(_Tp)> { }; @@ -38,7 +38,7 @@ template <class _Tp> inline constexpr bool is_volatile_v = is_volatile<_Tp>::value; #endif -#endif // __has_keyword(__is_volatile) +#endif // __has_builtin(__is_volatile) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 0154dbc39c08..f616a031960e 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -287,6 +287,50 @@ namespace ranges { indirect_unary_predicate<projected<iterator_t<R>, Proj>> Pred> constexpr bool ranges::is_partitioned(R&& r, Pred pred, Proj proj = {}); // since C++20 + template<random_access_iterator I, sentinel_for<I> S, class Comp = ranges::less, + class Proj = identity> + requires sortable<I, Comp, Proj> + constexpr I + ranges::push_heap(I first, S last, Comp comp = {}, Proj proj = {}); // since C++20 + + template<random_access_range R, class Comp = ranges::less, class Proj = identity> + requires sortable<iterator_t<R>, Comp, Proj> + constexpr borrowed_iterator_t<R> + ranges::push_heap(R&& r, Comp comp = {}, Proj proj = {}); // since C++20 + + template<random_access_iterator I, sentinel_for<I> S, class Comp = ranges::less, + class Proj = identity> + requires sortable<I, Comp, Proj> + constexpr I + ranges::pop_heap(I first, S last, Comp comp = {}, Proj proj = {}); // since C++20 + + template<random_access_range R, class Comp = ranges::less, class Proj = identity> + requires sortable<iterator_t<R>, Comp, Proj> + constexpr borrowed_iterator_t<R> + ranges::pop_heap(R&& r, Comp comp = {}, Proj proj = {}); // since C++20 + + template<random_access_iterator I, sentinel_for<I> S, class Comp = ranges::less, + class Proj = identity> + requires sortable<I, Comp, Proj> + constexpr I + ranges::make_heap(I first, S last, Comp comp = {}, Proj proj = {}); // since C++20 + + template<random_access_range R, class Comp = ranges::less, class Proj = identity> + requires sortable<iterator_t<R>, Comp, Proj> + constexpr borrowed_iterator_t<R> + ranges::make_heap(R&& r, Comp comp = {}, Proj proj = {}); // since C++20 + + template<random_access_iterator I, sentinel_for<I> S, class Comp = ranges::less, + class Proj = identity> + requires sortable<I, Comp, Proj> + constexpr I + ranges::sort_heap(I first, S last, Comp comp = {}, Proj proj = {}); // since C++20 + + template<random_access_range R, class Comp = ranges::less, class Proj = identity> + requires sortable<iterator_t<R>, Comp, Proj> + constexpr borrowed_iterator_t<R> + ranges::sort_heap(R&& r, Comp comp = {}, Proj proj = {}); // since C++20 + template<bidirectional_iterator I, sentinel_for<I> S> requires permutable<I> constexpr I ranges::reverse(I first, S last); // since C++20 @@ -379,6 +423,17 @@ namespace ranges { constexpr borrowed_iterator_t<R> ranges::is_sorted_until(R&& r, Comp comp = {}, Proj proj = {}); // since C++20 + template<random_access_iterator I, sentinel_for<I> S, class Comp = ranges::less, + class Proj = identity> + requires sortable<I, Comp, Proj> + constexpr I + ranges::nth_element(I first, I nth, S last, Comp comp = {}, Proj proj = {}); // since C++20 + + template<random_access_range R, class Comp = ranges::less, class Proj = identity> + requires sortable<iterator_t<R>, Comp, Proj> + constexpr borrowed_iterator_t<R> + ranges::nth_element(R&& r, iterator_t<R> nth, Comp comp = {}, Proj proj = {}); // since C++20 + template<forward_iterator I, sentinel_for<I> S, class T, class Proj = identity, indirect_strict_weak_order<const T*, projected<I, Proj>> Comp = ranges::less> constexpr I upper_bound(I first, S last, const T& value, Comp comp = {}, Proj proj = {}); // since C++20 @@ -493,7 +548,170 @@ namespace ranges { constexpr ranges::move_result<borrowed_iterator_t<R>, O> ranges::move(R&& r, O result); // since C++20 + template<class I1, class I2, class O> + using merge_result = in_in_out_result<I1, I2, O>; // since C++20 + + template<input_iterator I1, sentinel_for<I1> S1, input_iterator I2, sentinel_for<I2> S2, + weakly_incrementable O, class Comp = ranges::less, class Proj1 = identity, + class Proj2 = identity> + requires mergeable<I1, I2, O, Comp, Proj1, Proj2> + constexpr merge_result<I1, I2, O> + merge(I1 first1, S1 last1, I2 first2, S2 last2, O result, + Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<input_range R1, input_range R2, weakly_incrementable O, class Comp = ranges::less, + class Proj1 = identity, class Proj2 = identity> + requires mergeable<iterator_t<R1>, iterator_t<R2>, O, Comp, Proj1, Proj2> + constexpr merge_result<borrowed_iterator_t<R1>, borrowed_iterator_t<R2>, O> + merge(R1&& r1, R2&& r2, O result, + Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<permutable I, sentinel_for<I> S, class T, class Proj = identity> + requires indirect_binary_predicate<ranges::equal_to, projected<I, Proj>, const T*> + constexpr subrange<I> ranges::remove(I first, S last, const T& value, Proj proj = {}); // since C++20 + + template<forward_range R, class T, class Proj = identity> + requires permutable<iterator_t<R>> && + indirect_binary_predicate<ranges::equal_to, projected<iterator_t<R>, Proj>, const T*> + constexpr borrowed_subrange_t<R> + ranges::remove(R&& r, const T& value, Proj proj = {}); // since C++20 + + template<permutable I, sentinel_for<I> S, class Proj = identity, + indirect_unary_predicate<projected<I, Proj>> Pred> + constexpr subrange<I> ranges::remove_if(I first, S last, Pred pred, Proj proj = {}); // since C++20 + + template<forward_range R, class Proj = identity, + indirect_unary_predicate<projected<iterator_t<R>, Proj>> Pred> + requires permutable<iterator_t<R>> + constexpr borrowed_subrange_t<R> + ranges::remove_if(R&& r, Pred pred, Proj proj = {}); // since C++20 + + template<class I, class O> + using set_difference_result = in_out_result<I, O>; // since C++20 + template<input_iterator I1, sentinel_for<I1> S1, input_iterator I2, sentinel_for<I2> S2, + weakly_incrementable O, class Comp = ranges::less, + class Proj1 = identity, class Proj2 = identity> + requires mergeable<I1, I2, O, Comp, Proj1, Proj2> + constexpr set_difference_result<I1, O> + set_difference(I1 first1, S1 last1, I2 first2, S2 last2, O result, + Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<input_range R1, input_range R2, weakly_incrementable O, + class Comp = ranges::less, class Proj1 = identity, class Proj2 = identity> + requires mergeable<iterator_t<R1>, iterator_t<R2>, O, Comp, Proj1, Proj2> + constexpr set_difference_result<borrowed_iterator_t<R1>, O> + set_difference(R1&& r1, R2&& r2, O result, + Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<class I1, class I2, class O> + using set_intersection_result = in_in_out_result<I1, I2, O>; // since C++20 + + template<input_iterator I1, sentinel_for<I1> S1, input_iterator I2, sentinel_for<I2> S2, + weakly_incrementable O, class Comp = ranges::less, + class Proj1 = identity, class Proj2 = identity> + requires mergeable<I1, I2, O, Comp, Proj1, Proj2> + constexpr set_intersection_result<I1, I2, O> + set_intersection(I1 first1, S1 last1, I2 first2, S2 last2, O result, + Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<input_iterator I1, sentinel_for<I1> S1, input_iterator I2, sentinel_for<I2> S2, + weakly_incrementable O, class Comp = ranges::less, + class Proj1 = identity, class Proj2 = identity> + requires mergeable<I1, I2, O, Comp, Proj1, Proj2> + constexpr set_intersection_result<borrowed_iterator_t<R1>, borrowed_iterator_t<R2>, O> + set_intersection(R1&& r1, R2&& r2, O result, + Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template <class _InIter, class _OutIter> + using reverse_copy_result = in_out_result<_InIter, _OutIter>; // since C++20 + + template<bidirectional_iterator I, sentinel_for<I> S, weakly_incrementable O> + requires indirectly_copyable<I, O> + constexpr ranges::reverse_copy_result<I, O> + ranges::reverse_copy(I first, S last, O result); // since C++20 + + template<bidirectional_range R, weakly_incrementable O> + requires indirectly_copyable<iterator_t<R>, O> + constexpr ranges::reverse_copy_result<borrowed_iterator_t<R>, O> + ranges::reverse_copy(R&& r, O result); // since C++20 + + template <class _InIter, class _OutIter> + using rotate_copy_result = in_out_result<_InIter, _OutIter>; // since C++20 + + template<forward_iterator I, sentinel_for<I> S, weakly_incrementable O> + requires indirectly_copyable<I, O> + constexpr ranges::rotate_copy_result<I, O> + ranges::rotate_copy(I first, I middle, S last, O result); // since C++20 + + template<forward_range R, weakly_incrementable O> + requires indirectly_copyable<iterator_t<R>, O> + constexpr ranges::rotate_copy_result<borrowed_iterator_t<R>, O> + ranges::rotate_copy(R&& r, iterator_t<R> middle, O result); // since C++20 + + template<forward_iterator I1, sentinel_for<I1> S1, forward_iterator I2, + sentinel_for<I2> S2, class Pred = ranges::equal_to, + class Proj1 = identity, class Proj2 = identity> + requires indirectly_comparable<I1, I2, Pred, Proj1, Proj2> + constexpr subrange<I1> + ranges::search(I1 first1, S1 last1, I2 first2, S2 last2, Pred pred = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<forward_range R1, forward_range R2, class Pred = ranges::equal_to, + class Proj1 = identity, class Proj2 = identity> + requires indirectly_comparable<iterator_t<R1>, iterator_t<R2>, Pred, Proj1, Proj2> + constexpr borrowed_subrange_t<R1> + ranges::search(R1&& r1, R2&& r2, Pred pred = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<forward_iterator I, sentinel_for<I> S, class T, + class Pred = ranges::equal_to, class Proj = identity> + requires indirectly_comparable<I, const T*, Pred, Proj> + constexpr subrange<I> + ranges::search_n(I first, S last, iter_difference_t<I> count, + const T& value, Pred pred = {}, Proj proj = {}); // since C++20 + + template<forward_range R, class T, class Pred = ranges::equal_to, + class Proj = identity> + requires indirectly_comparable<iterator_t<R>, const T*, Pred, Proj> + constexpr borrowed_subrange_t<R> + ranges::search_n(R&& r, range_difference_t<R> count, + const T& value, Pred pred = {}, Proj proj = {}); // since C++20 + + template<forward_iterator I1, sentinel_for<I1> S1, forward_iterator I2, sentinel_for<I2> S2, + class Pred = ranges::equal_to, class Proj1 = identity, class Proj2 = identity> + requires indirectly_comparable<I1, I2, Pred, Proj1, Proj2> + constexpr subrange<I1> + ranges::find_end(I1 first1, S1 last1, I2 first2, S2 last2, Pred pred = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<forward_range R1, forward_range R2, + class Pred = ranges::equal_to, class Proj1 = identity, class Proj2 = identity> + requires indirectly_comparable<iterator_t<R1>, iterator_t<R2>, Pred, Proj1, Proj2> + constexpr borrowed_subrange_t<R1> + ranges::find_end(R1&& r1, R2&& r2, Pred pred = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<class I1, class I2, class O> + using set_symmetric_difference_result = in_in_out_result<I1, I2, O>; // since C++20 + + template<input_iterator I1, sentinel_for<I1> S1, input_iterator I2, sentinel_for<I2> S2, + weakly_incrementable O, class Comp = ranges::less, + class Proj1 = identity, class Proj2 = identity> + requires mergeable<I1, I2, O, Comp, Proj1, Proj2> + constexpr set_symmetric_difference_result<I1, I2, O> + set_symmetric_difference(I1 first1, S1 last1, I2 first2, S2 last2, O result, + Comp comp = {}, Proj1 proj1 = {}, + Proj2 proj2 = {}); // since C++20 + + template<input_range R1, input_range R2, weakly_incrementable O, + class Comp = ranges::less, class Proj1 = identity, class Proj2 = identity> + requires mergeable<iterator_t<R1>, iterator_t<R2>, O, Comp, Proj1, Proj2> + constexpr set_symmetric_difference_result<borrowed_iterator_t<R1>, + borrowed_iterator_t<R2>, O> + set_symmetric_difference(R1&& r1, R2&& r2, O result, Comp comp = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + } constexpr bool // constexpr in C++20 @@ -1237,6 +1455,7 @@ template <class BidirectionalIterator, class Compare> #include <__algorithm/ranges_fill.h> #include <__algorithm/ranges_fill_n.h> #include <__algorithm/ranges_find.h> +#include <__algorithm/ranges_find_end.h> #include <__algorithm/ranges_find_first_of.h> #include <__algorithm/ranges_find_if.h> #include <__algorithm/ranges_find_if_not.h> @@ -1247,8 +1466,10 @@ template <class BidirectionalIterator, class Compare> #include <__algorithm/ranges_is_sorted_until.h> #include <__algorithm/ranges_lexicographical_compare.h> #include <__algorithm/ranges_lower_bound.h> +#include <__algorithm/ranges_make_heap.h> #include <__algorithm/ranges_max.h> #include <__algorithm/ranges_max_element.h> +#include <__algorithm/ranges_merge.h> #include <__algorithm/ranges_min.h> #include <__algorithm/ranges_min_element.h> #include <__algorithm/ranges_minmax.h> @@ -1257,10 +1478,23 @@ template <class BidirectionalIterator, class Compare> #include <__algorithm/ranges_move.h> #include <__algorithm/ranges_move_backward.h> #include <__algorithm/ranges_none_of.h> +#include <__algorithm/ranges_nth_element.h> +#include <__algorithm/ranges_pop_heap.h> +#include <__algorithm/ranges_push_heap.h> +#include <__algorithm/ranges_remove.h> +#include <__algorithm/ranges_remove_if.h> #include <__algorithm/ranges_replace.h> #include <__algorithm/ranges_replace_if.h> #include <__algorithm/ranges_reverse.h> +#include <__algorithm/ranges_reverse_copy.h> +#include <__algorithm/ranges_rotate_copy.h> +#include <__algorithm/ranges_search.h> +#include <__algorithm/ranges_search_n.h> +#include <__algorithm/ranges_set_difference.h> +#include <__algorithm/ranges_set_intersection.h> +#include <__algorithm/ranges_set_symmetric_difference.h> #include <__algorithm/ranges_sort.h> +#include <__algorithm/ranges_sort_heap.h> #include <__algorithm/ranges_stable_sort.h> #include <__algorithm/ranges_swap_ranges.h> #include <__algorithm/ranges_transform.h> diff --git a/libcxx/include/any b/libcxx/include/any index 7e12034b45a7..66f7488e5441 100644 --- a/libcxx/include/any +++ b/libcxx/include/any @@ -271,7 +271,7 @@ public: is_copy_constructible<_Tp>::value> > _LIBCPP_INLINE_VISIBILITY - _Tp& emplace(_Args&&... args); + _Tp& emplace(_Args&&...); template <class _ValueType, class _Up, class ..._Args, class _Tp = decay_t<_ValueType>, diff --git a/libcxx/include/array b/libcxx/include/array index e96c3d813339..867dd6b1bcc6 100644 --- a/libcxx/include/array +++ b/libcxx/include/array @@ -438,12 +438,7 @@ operator>=(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) template <class _Tp, size_t _Size> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -typename enable_if -< - _Size == 0 || - __is_swappable<_Tp>::value, - void ->::type +__enable_if_t<_Size == 0 || __is_swappable<_Tp>::value, void> swap(array<_Tp, _Size>& __x, array<_Tp, _Size>& __y) _NOEXCEPT_(noexcept(__x.swap(__y))) { diff --git a/libcxx/include/atomic b/libcxx/include/atomic index 0c6d3079c96a..92da4820e928 100644 --- a/libcxx/include/atomic +++ b/libcxx/include/atomic @@ -906,8 +906,8 @@ struct __cxx_atomic_base_impl { #else __cxx_atomic_base_impl() _NOEXCEPT : __a_value() {} #endif // _LIBCPP_CXX03_LANG - _LIBCPP_CONSTEXPR explicit __cxx_atomic_base_impl(_Tp value) _NOEXCEPT - : __a_value(value) {} + _LIBCPP_CONSTEXPR explicit __cxx_atomic_base_impl(_Tp __value) _NOEXCEPT + : __a_value(__value) {} _LIBCPP_DISABLE_EXTENSION_WARNING _Atomic(_Tp) __a_value; }; @@ -1451,8 +1451,8 @@ struct __cxx_atomic_impl : public _Base { "std::atomic<T> requires that 'T' be a trivially copyable type"); _LIBCPP_INLINE_VISIBILITY __cxx_atomic_impl() _NOEXCEPT = default; - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR explicit __cxx_atomic_impl(_Tp value) _NOEXCEPT - : _Base(value) {} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR explicit __cxx_atomic_impl(_Tp __value) _NOEXCEPT + : _Base(__value) {} }; #if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__)) @@ -2696,7 +2696,6 @@ typedef atomic<__libcpp_unsigned_lock_free> atomic_unsigned_lock_free; #if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) # if defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1400 -# pragma clang deprecated(ATOMIC_FLAG_INIT) # pragma clang deprecated(ATOMIC_VAR_INIT) # endif #endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) diff --git a/libcxx/include/barrier b/libcxx/include/barrier index 9d91d255df9a..00518035283f 100644 --- a/libcxx/include/barrier +++ b/libcxx/include/barrier @@ -130,10 +130,10 @@ public: { } [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY - arrival_token arrive(ptrdiff_t update) + arrival_token arrive(ptrdiff_t __update) { auto const __old_phase = __phase_.load(memory_order_relaxed); - for(; update; --update) + for(; __update; --__update) if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) { __completion_(); __expected_ += __expected_adjustment_.load(memory_order_relaxed); @@ -300,9 +300,9 @@ public: barrier& operator=(barrier const&) = delete; [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY - arrival_token arrive(ptrdiff_t update = 1) + arrival_token arrive(ptrdiff_t __update = 1) { - return __b.arrive(update); + return __b.arrive(__update); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void wait(arrival_token&& __phase) const diff --git a/libcxx/include/charconv b/libcxx/include/charconv index 6a63e5fe9057..9f474ae711f3 100644 --- a/libcxx/include/charconv +++ b/libcxx/include/charconv @@ -117,32 +117,22 @@ from_chars_result from_chars(const char*, const char*, bool, int = 10) = delete; namespace __itoa { - template <typename _Tp, typename = void> -struct _LIBCPP_HIDDEN __traits_base -{ - using type = uint64_t; - - static _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) - { - auto __t = (64 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12; - return __t - (__v < __table<>::__pow10_64[__t]) + 1; - } - - static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v) - { - return __itoa::__base_10_u64(__p, __v); - } - - static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_64)& __pow() { return __table<>::__pow10_64; } -}; +struct _LIBCPP_HIDDEN __traits_base; template <typename _Tp> -struct _LIBCPP_HIDDEN - __traits_base<_Tp, decltype(void(uint32_t{declval<_Tp>()}))> +struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uint32_t)>> { using type = uint32_t; + /// The width estimation using a log10 algorithm. + /// + /// The algorithm is based on + /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 + /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that + /// function requires its input to have at least one bit set the value of + /// zero is set to one. This means the first element of the lookup table is + /// zero. static _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) { auto __t = (32 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12; @@ -158,6 +148,61 @@ struct _LIBCPP_HIDDEN }; template <typename _Tp> +struct _LIBCPP_HIDDEN + __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uint64_t)>> { + using type = uint64_t; + + /// The width estimation using a log10 algorithm. + /// + /// The algorithm is based on + /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 + /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that + /// function requires its input to have at least one bit set the value of + /// zero is set to one. This means the first element of the lookup table is + /// zero. + static _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) { + auto __t = (64 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12; + return __t - (__v < __table<>::__pow10_64[__t]) + 1; + } + + static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v) { return __itoa::__base_10_u64(__p, __v); } + + static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_64)& __pow() { return __table<>::__pow10_64; } +}; + + +# ifndef _LIBCPP_HAS_NO_INT128 +template <typename _Tp> +struct _LIBCPP_HIDDEN + __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__uint128_t)> > { + using type = __uint128_t; + + /// The width estimation using a log10 algorithm. + /// + /// The algorithm is based on + /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 + /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that + /// function requires its input to have at least one bit set the value of + /// zero is set to one. This means the first element of the lookup table is + /// zero. + static _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) { + _LIBCPP_ASSERT(__v > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true."); + // There's always a bit set in the upper 64-bits. + auto __t = (128 - std::__libcpp_clz(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12; + _LIBCPP_ASSERT(__t >= __table<>::__pow10_128_offset, "Index out of bounds"); + // __t is adjusted since the lookup table misses the lower entries. + return __t - (__v < __table<>::__pow10_128[__t - __table<>::__pow10_128_offset]) + 1; + } + + static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v) { return __itoa::__base_10_u128(__p, __v); } + + // TODO FMT This pow function should get an index. + // By moving this to its own header it can be reused by the pow function in to_chars_base_10. + static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_128)& __pow() { return __table<>::__pow10_128; } +}; +#endif + +template <typename _Tp> inline _LIBCPP_HIDE_FROM_ABI bool __mul_overflowed(unsigned char __a, _Tp __b, unsigned char& __r) { @@ -271,6 +316,28 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type) return {__last, errc::value_too_large}; } +# ifndef _LIBCPP_HAS_NO_INT128 +template <> +inline _LIBCPP_HIDE_FROM_ABI to_chars_result +__to_chars_itoa(char* __first, char* __last, __uint128_t __value, false_type) +{ + // When the value fits in 64-bits use the 64-bit code path. This reduces + // the number of expensive calculations on 128-bit values. + // + // NOTE the 128-bit code path requires this optimization. + if(__value <= numeric_limits<uint64_t>::max()) + return __to_chars_itoa(__first, __last, static_cast<uint64_t>(__value), false_type()); + + using __tx = __itoa::__traits<__uint128_t>; + auto __diff = __last - __first; + + if (__tx::digits <= __diff || __tx::__width(__value) <= __diff) + return {__tx::__convert(__first, __value), errc(0)}; + else + return {__last, errc::value_too_large}; +} +#endif + template <typename _Tp> inline _LIBCPP_HIDE_FROM_ABI to_chars_result __to_chars_integral(char* __first, char* __last, _Tp __value, int __base, @@ -493,7 +560,6 @@ to_chars(char* __first, char* __last, _Tp __value) { using _Type = __make_32_64_or_128_bit_t<_Tp>; static_assert(!is_same<_Type, void>::value, "unsupported integral type used in to_chars"); - static_assert(sizeof(_Tp) <= sizeof(int64_t), "128-bit integral support isn't available yet in to_chars"); return std::__to_chars_itoa(__first, __last, static_cast<_Type>(__value), is_signed<_Tp>()); } @@ -504,7 +570,6 @@ to_chars(char* __first, char* __last, _Tp __value, int __base) _LIBCPP_ASSERT(2 <= __base && __base <= 36, "base not in [2, 36]"); using _Type = __make_32_64_or_128_bit_t<_Tp>; - static_assert(sizeof(_Tp) <= sizeof(int64_t), "128-bit integral support isn't available yet in to_chars"); return std::__to_chars_integral(__first, __last, static_cast<_Type>(__value), __base, is_signed<_Tp>()); } @@ -623,11 +688,11 @@ __from_chars_atoi(const char* __first, const char* __last, _Tp& __value) return __subject_seq_combinator( __first, __last, __value, - [](const char* _First, const char* _Last, + [](const char* __f, const char* __l, _Tp& __val) -> from_chars_result { __output_type __a, __b; - auto __p = __tx::__read(_First, _Last, __a, __b); - if (__p == _Last || !__in_pattern(*__p)) + auto __p = __tx::__read(__f, __l, __a, __b); + if (__p == __l || !__in_pattern(*__p)) { __output_type __m = numeric_limits<_Tp>::max(); if (__m >= __a && __m - __a >= __b) @@ -659,22 +724,22 @@ __from_chars_integral(const char* __first, const char* __last, _Tp& __value, return __subject_seq_combinator( __first, __last, __value, [](const char* __p, const char* __lastp, _Tp& __val, - int _Base) -> from_chars_result { + int __b) -> from_chars_result { using __tl = numeric_limits<_Tp>; - auto __digits = __tl::digits / log2f(float(_Base)); - _Tp __a = __in_pattern(*__p++, _Base).__val, __b = 0; + auto __digits = __tl::digits / log2f(float(__b)); + _Tp __x = __in_pattern(*__p++, __b).__val, __y = 0; for (int __i = 1; __p != __lastp; ++__i, ++__p) { - if (auto __c = __in_pattern(*__p, _Base)) + if (auto __c = __in_pattern(*__p, __b)) { if (__i < __digits - 1) - __a = __a * _Base + __c.__val; + __x = __x * __b + __c.__val; else { - if (!__itoa::__mul_overflowed(__a, _Base, __a)) + if (!__itoa::__mul_overflowed(__x, __b, __x)) ++__p; - __b = __c.__val; + __y = __c.__val; break; } } @@ -682,11 +747,11 @@ __from_chars_integral(const char* __first, const char* __last, _Tp& __value, break; } - if (__p == __lastp || !__in_pattern(*__p, _Base)) + if (__p == __lastp || !__in_pattern(*__p, __b)) { - if (__tl::max() - __a >= __b) + if (__tl::max() - __x >= __y) { - __val = __a + __b; + __val = __x + __y; return {__p, {}}; } } diff --git a/libcxx/include/chrono b/libcxx/include/chrono index 9185d74c09c4..2af5fbcc5165 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -13,6 +13,8 @@ /* chrono synopsis +#include <compare> // C++20 + namespace std { namespace chrono @@ -325,11 +327,7 @@ struct last_spec; class day; constexpr bool operator==(const day& x, const day& y) noexcept; -constexpr bool operator!=(const day& x, const day& y) noexcept; -constexpr bool operator< (const day& x, const day& y) noexcept; -constexpr bool operator> (const day& x, const day& y) noexcept; -constexpr bool operator<=(const day& x, const day& y) noexcept; -constexpr bool operator>=(const day& x, const day& y) noexcept; +constexpr strong_ordering operator<=>(const day& x, const day& y) noexcept; constexpr day operator+(const day& x, const days& y) noexcept; constexpr day operator+(const days& x, const day& y) noexcept; constexpr day operator-(const day& x, const days& y) noexcept; @@ -715,9 +713,11 @@ constexpr chrono::year operator ""y(unsigned lo #include <__chrono/year_month_day.h> #include <__chrono/year_month_weekday.h> #include <__config> -#include <compare> #include <version> +// standard-mandated includes +#include <compare> + #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif diff --git a/libcxx/include/cmath b/libcxx/include/cmath index 2d22151684e0..4d81eed339d8 100644 --- a/libcxx/include/cmath +++ b/libcxx/include/cmath @@ -530,9 +530,9 @@ using ::tgammal _LIBCPP_USING_IF_EXISTS; using ::truncl _LIBCPP_USING_IF_EXISTS; #if _LIBCPP_STD_VER > 14 -inline _LIBCPP_INLINE_VISIBILITY float hypot( float x, float y, float z ) { return sqrt(x*x + y*y + z*z); } -inline _LIBCPP_INLINE_VISIBILITY double hypot( double x, double y, double z ) { return sqrt(x*x + y*y + z*z); } -inline _LIBCPP_INLINE_VISIBILITY long double hypot( long double x, long double y, long double z ) { return sqrt(x*x + y*y + z*z); } +inline _LIBCPP_INLINE_VISIBILITY float hypot( float __x, float __y, float __z ) { return sqrt(__x*__x + __y*__y + __z*__z); } +inline _LIBCPP_INLINE_VISIBILITY double hypot( double __x, double __y, double __z ) { return sqrt(__x*__x + __y*__y + __z*__z); } +inline _LIBCPP_INLINE_VISIBILITY long double hypot( long double __x, long double __y, long double __z ) { return sqrt(__x*__x + __y*__y + __z*__z); } template <class _A1, class _A2, class _A3> inline _LIBCPP_INLINE_VISIBILITY diff --git a/libcxx/include/codecvt b/libcxx/include/codecvt index 3e5110a008e4..9f18a7b1a989 100644 --- a/libcxx/include/codecvt +++ b/libcxx/include/codecvt @@ -92,10 +92,10 @@ public: _LIBCPP_SUPPRESS_DEPRECATED_PUSH _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf8(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<wchar_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf8(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<wchar_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: virtual result @@ -130,10 +130,10 @@ public: typedef mbstate_t state_type; _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf8(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<char16_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf8(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<char16_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: @@ -168,10 +168,10 @@ public: typedef mbstate_t state_type; _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf8(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<char32_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf8(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<char32_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: @@ -229,10 +229,10 @@ public: _LIBCPP_SUPPRESS_DEPRECATED_PUSH _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<wchar_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf16(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<wchar_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: virtual result @@ -268,10 +268,10 @@ public: _LIBCPP_SUPPRESS_DEPRECATED_PUSH _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<wchar_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf16(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<wchar_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: virtual result @@ -306,10 +306,10 @@ public: typedef mbstate_t state_type; _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<char16_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf16(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<char16_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: @@ -344,10 +344,10 @@ public: typedef mbstate_t state_type; _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<char16_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf16(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<char16_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: @@ -382,10 +382,10 @@ public: typedef mbstate_t state_type; _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<char32_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf16(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<char32_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: @@ -420,10 +420,10 @@ public: typedef mbstate_t state_type; _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf16(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<char32_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf16(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<char32_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: @@ -481,10 +481,10 @@ public: _LIBCPP_SUPPRESS_DEPRECATED_PUSH _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf8_utf16(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<wchar_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf8_utf16(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<wchar_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: virtual result @@ -519,10 +519,10 @@ public: typedef mbstate_t state_type; _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf8_utf16(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<char32_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf8_utf16(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<char32_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: @@ -557,10 +557,10 @@ public: typedef mbstate_t state_type; _LIBCPP_INLINE_VISIBILITY - explicit __codecvt_utf8_utf16(size_t __refs, unsigned long _Maxcode, - codecvt_mode _Mode) - : codecvt<char16_t, char, mbstate_t>(__refs), _Maxcode_(_Maxcode), - _Mode_(_Mode) {} + explicit __codecvt_utf8_utf16(size_t __refs, unsigned long __maxcode, + codecvt_mode __mode) + : codecvt<char16_t, char, mbstate_t>(__refs), _Maxcode_(__maxcode), + _Mode_(__mode) {} _LIBCPP_SUPPRESS_DEPRECATED_POP protected: diff --git a/libcxx/include/condition_variable b/libcxx/include/condition_variable index dfcb7160565b..92088f3e1b22 100644 --- a/libcxx/include/condition_variable +++ b/libcxx/include/condition_variable @@ -261,7 +261,7 @@ condition_variable_any::wait_for(_Lock& __lock, } _LIBCPP_FUNC_VIS -void notify_all_at_thread_exit(condition_variable& cond, unique_lock<mutex> lk); +void notify_all_at_thread_exit(condition_variable&, unique_lock<mutex>); _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/deque b/libcxx/include/deque index d8f48f07954c..0d7eb9aa9316 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -1464,12 +1464,10 @@ public: iterator insert(const_iterator __p, size_type __n, const value_type& __v); template <class _InputIter> iterator insert(const_iterator __p, _InputIter __f, _InputIter __l, - typename enable_if<__is_cpp17_input_iterator<_InputIter>::value - &&!__is_cpp17_forward_iterator<_InputIter>::value>::type* = 0); + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIter>::value>::type* = 0); template <class _ForwardIterator> iterator insert(const_iterator __p, _ForwardIterator __f, _ForwardIterator __l, - typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value - &&!__is_cpp17_bidirectional_iterator<_ForwardIterator>::value>::type* = 0); + typename enable_if<__is_exactly_cpp17_forward_iterator<_ForwardIterator>::value>::type* = 0); template <class _BiIter> iterator insert(const_iterator __p, _BiIter __f, _BiIter __l, typename enable_if<__is_cpp17_bidirectional_iterator<_BiIter>::value>::type* = 0); @@ -1556,8 +1554,7 @@ public: template <class _InpIter> void __append(_InpIter __f, _InpIter __l, - typename enable_if<__is_cpp17_input_iterator<_InpIter>::value && - !__is_cpp17_forward_iterator<_InpIter>::value>::type* = 0); + typename enable_if<__is_exactly_cpp17_input_iterator<_InpIter>::value>::type* = 0); template <class _ForIter> void __append(_ForIter __f, _ForIter __l, typename enable_if<__is_cpp17_forward_iterator<_ForIter>::value>::type* = 0); @@ -2266,8 +2263,7 @@ template <class _Tp, class _Allocator> template <class _InputIter> typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::insert(const_iterator __p, _InputIter __f, _InputIter __l, - typename enable_if<__is_cpp17_input_iterator<_InputIter>::value - &&!__is_cpp17_forward_iterator<_InputIter>::value>::type*) + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIter>::value>::type*) { __split_buffer<value_type, allocator_type&> __buf(__base::__alloc()); __buf.__construct_at_end(__f, __l); @@ -2279,8 +2275,7 @@ template <class _Tp, class _Allocator> template <class _ForwardIterator> typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::insert(const_iterator __p, _ForwardIterator __f, _ForwardIterator __l, - typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value - &&!__is_cpp17_bidirectional_iterator<_ForwardIterator>::value>::type*) + typename enable_if<__is_exactly_cpp17_forward_iterator<_ForwardIterator>::value>::type*) { size_type __n = _VSTD::distance(__f, __l); __split_buffer<value_type, allocator_type&> __buf(__n, 0, __base::__alloc()); @@ -2362,8 +2357,7 @@ template <class _Tp, class _Allocator> template <class _InpIter> void deque<_Tp, _Allocator>::__append(_InpIter __f, _InpIter __l, - typename enable_if<__is_cpp17_input_iterator<_InpIter>::value && - !__is_cpp17_forward_iterator<_InpIter>::value>::type*) + typename enable_if<__is_exactly_cpp17_input_iterator<_InpIter>::value>::type*) { for (; __f != __l; ++__f) #ifdef _LIBCPP_CXX03_LANG diff --git a/libcxx/include/exception b/libcxx/include/exception index 412e02af3822..7b514e5dca54 100644 --- a/libcxx/include/exception +++ b/libcxx/include/exception @@ -216,7 +216,7 @@ _LIBCPP_FUNC_VIS void swap(exception_ptr&, exception_ptr&) _NOEXCEPT; _LIBCPP_FUNC_VIS exception_ptr __copy_exception_ptr(void *__except, const void* __ptr); _LIBCPP_FUNC_VIS exception_ptr current_exception() _NOEXCEPT; -_LIBCPP_NORETURN _LIBCPP_FUNC_VIS void rethrow_exception(exception_ptr p); +_LIBCPP_NORETURN _LIBCPP_FUNC_VIS void rethrow_exception(exception_ptr); // This is a built-in template function which automagically extracts the required // information. @@ -310,7 +310,7 @@ template <class _Ep> inline _LIBCPP_INLINE_VISIBILITY void rethrow_if_nested(const _Ep& __e, - typename enable_if< __can_dynamic_cast<_Ep, nested_exception>::value>::type* = 0) + __enable_if_t< __can_dynamic_cast<_Ep, nested_exception>::value>* = 0) { const nested_exception* __nep = dynamic_cast<const nested_exception*>(_VSTD::addressof(__e)); if (__nep) @@ -321,7 +321,7 @@ template <class _Ep> inline _LIBCPP_INLINE_VISIBILITY void rethrow_if_nested(const _Ep&, - typename enable_if<!__can_dynamic_cast<_Ep, nested_exception>::value>::type* = 0) + __enable_if_t<!__can_dynamic_cast<_Ep, nested_exception>::value>* = 0) { } diff --git a/libcxx/include/experimental/functional b/libcxx/include/experimental/functional index 04f195d3cf24..12440744ca50 100644 --- a/libcxx/include/experimental/functional +++ b/libcxx/include/experimental/functional @@ -62,6 +62,7 @@ inline namespace fundamentals_v1 { #include <__assert> // all public C++ headers provide the assertion handler #include <__debug> +#include <__functional/identity.h> #include <__memory/uses_allocator.h> #include <array> #include <experimental/__config> @@ -104,9 +105,8 @@ public: pair<_ForwardIterator2, _ForwardIterator2> operator () (_ForwardIterator2 __f, _ForwardIterator2 __l) const { - return _VSTD::__search(__f, __l, __first_, __last_, __pred_, - typename iterator_traits<_ForwardIterator>::iterator_category(), - typename iterator_traits<_ForwardIterator2>::iterator_category()); + auto __proj = __identity(); + return std::__search_impl(__f, __l, __first_, __last_, __pred_, __proj, __proj); } private: diff --git a/libcxx/include/experimental/simd b/libcxx/include/experimental/simd index 9b55cc009986..f77ce59bb269 100644 --- a/libcxx/include/experimental/simd +++ b/libcxx/include/experimental/simd @@ -1242,32 +1242,32 @@ _Tp reduce(const simd<_Tp, _Abi>&, _BinaryOp = _BinaryOp()); template <class _MaskType, class _SimdType, class _BinaryOp> typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, - typename _SimdType::value_type neutral_element, _BinaryOp binary_op); + typename _SimdType::value_type __neutral_element, _BinaryOp); template <class _MaskType, class _SimdType> typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, - plus<typename _SimdType::value_type> binary_op = {}); + plus<typename _SimdType::value_type> = {}); template <class _MaskType, class _SimdType> typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, - multiplies<typename _SimdType::value_type> binary_op); + multiplies<typename _SimdType::value_type>); template <class _MaskType, class _SimdType> typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, - bit_and<typename _SimdType::value_type> binary_op); + bit_and<typename _SimdType::value_type>); template <class _MaskType, class _SimdType> typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, - bit_or<typename _SimdType::value_type> binary_op); + bit_or<typename _SimdType::value_type>); template <class _MaskType, class _SimdType> typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, - bit_xor<typename _SimdType::value_type> binary_op); + bit_xor<typename _SimdType::value_type>); template <class _Tp, class _Abi> _Tp hmin(const simd<_Tp, _Abi>&); diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map index 8afe12f6319b..a581d5c550c4 100644 --- a/libcxx/include/ext/hash_map +++ b/libcxx/include/ext/hash_map @@ -605,7 +605,7 @@ public: {return __table_.bucket_size(__n);} _LIBCPP_INLINE_VISIBILITY - void resize(size_type __n) {__table_.rehash(__n);} + void resize(size_type __n) {__table_.__rehash_unique(__n);} private: __node_holder __construct_node(const key_type& __k); @@ -616,7 +616,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map( size_type __n, const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { - __table_.rehash(__n); + __table_.__rehash_unique(__n); } template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc> @@ -625,7 +625,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map( const allocator_type& __a) : __table_(__hf, __eql, __a) { - __table_.rehash(__n); + __table_.__rehash_unique(__n); } template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc> @@ -643,7 +643,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map( const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__first, __last); } @@ -654,7 +654,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map( const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, __a) { - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__first, __last); } @@ -663,7 +663,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map( const hash_map& __u) : __table_(__u.__table_) { - __table_.rehash(__u.bucket_count()); + __table_.__rehash_unique(__u.bucket_count()); insert(__u.begin(), __u.end()); } @@ -874,7 +874,7 @@ public: {return __table_.bucket_size(__n);} _LIBCPP_INLINE_VISIBILITY - void resize(size_type __n) {__table_.rehash(__n);} + void resize(size_type __n) {__table_.__rehash_multi(__n);} }; template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc> @@ -882,7 +882,7 @@ hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap( size_type __n, const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { - __table_.rehash(__n); + __table_.__rehash_multi(__n); } template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc> @@ -891,7 +891,7 @@ hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap( const allocator_type& __a) : __table_(__hf, __eql, __a) { - __table_.rehash(__n); + __table_.__rehash_multi(__n); } template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc> @@ -909,7 +909,7 @@ hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap( const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__first, __last); } @@ -920,7 +920,7 @@ hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap( const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, __a) { - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__first, __last); } @@ -929,7 +929,7 @@ hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap( const hash_multimap& __u) : __table_(__u.__table_) { - __table_.rehash(__u.bucket_count()); + __table_.__rehash_multi(__u.bucket_count()); insert(__u.begin(), __u.end()); } diff --git a/libcxx/include/ext/hash_set b/libcxx/include/ext/hash_set index 433c13f80bb2..5823b5fb9476 100644 --- a/libcxx/include/ext/hash_set +++ b/libcxx/include/ext/hash_set @@ -333,7 +333,7 @@ public: size_type elems_in_bucket(size_type __n) const {return __table_.bucket_size(__n);} _LIBCPP_INLINE_VISIBILITY - void resize(size_type __n) {__table_.rehash(__n);} + void resize(size_type __n) {__table_.__rehash_unique(__n);} }; template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -341,7 +341,7 @@ hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set(size_type __n, const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { - __table_.rehash(__n); + __table_.__rehash_unique(__n); } template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -349,7 +349,7 @@ hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set(size_type __n, const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, __a) { - __table_.rehash(__n); + __table_.__rehash_unique(__n); } template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -367,7 +367,7 @@ hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set( const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__first, __last); } @@ -378,7 +378,7 @@ hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set( const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, __a) { - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__first, __last); } @@ -387,7 +387,7 @@ hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set( const hash_set& __u) : __table_(__u.__table_) { - __table_.rehash(__u.bucket_count()); + __table_.__rehash_unique(__u.bucket_count()); insert(__u.begin(), __u.end()); } @@ -553,7 +553,7 @@ public: size_type elems_in_bucket(size_type __n) const {return __table_.bucket_size(__n);} _LIBCPP_INLINE_VISIBILITY - void resize(size_type __n) {__table_.rehash(__n);} + void resize(size_type __n) {__table_.__rehash_multi(__n);} }; template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -561,7 +561,7 @@ hash_multiset<_Value, _Hash, _Pred, _Alloc>::hash_multiset( size_type __n, const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { - __table_.rehash(__n); + __table_.__rehash_multi(__n); } template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -570,7 +570,7 @@ hash_multiset<_Value, _Hash, _Pred, _Alloc>::hash_multiset( const allocator_type& __a) : __table_(__hf, __eql, __a) { - __table_.rehash(__n); + __table_.__rehash_multi(__n); } template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -588,7 +588,7 @@ hash_multiset<_Value, _Hash, _Pred, _Alloc>::hash_multiset( const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__first, __last); } @@ -599,7 +599,7 @@ hash_multiset<_Value, _Hash, _Pred, _Alloc>::hash_multiset( const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, __a) { - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__first, __last); } @@ -608,7 +608,7 @@ hash_multiset<_Value, _Hash, _Pred, _Alloc>::hash_multiset( const hash_multiset& __u) : __table_(__u.__table_) { - __table_.rehash(__u.bucket_count()); + __table_.__rehash_multi(__u.bucket_count()); insert(__u.begin(), __u.end()); } diff --git a/libcxx/include/format b/libcxx/include/format index bf51edd91ece..60197d24523f 100644 --- a/libcxx/include/format +++ b/libcxx/include/format @@ -36,13 +36,13 @@ namespace std { // [format.functions], formatting functions template<class... Args> - string format(format-string<Args...> fmt, const Args&... args); + string format(format-string<Args...> fmt, Args&&... args); template<class... Args> - wstring format(wformat-string<Args...> fmt, const Args&... args); + wstring format(wformat-string<Args...> fmt, Args&&... args); template<class... Args> - string format(const locale& loc, format-string<Args...> fmt, const Args&... args); + string format(const locale& loc, format-string<Args...> fmt, Args&&... args); template<class... Args> - wstring format(const locale& loc, wformat-string<Args...> fmt, const Args&... args); + wstring format(const locale& loc, wformat-string<Args...> fmt, Args&&... args); string vformat(string_view fmt, format_args args); wstring vformat(wstring_view fmt, wformat_args args); @@ -50,13 +50,13 @@ namespace std { wstring vformat(const locale& loc, wstring_view fmt, wformat_args args); template<class Out, class... Args> - Out format_to(Out out, format-string<Args...> fmt, const Args&... args); + Out format_to(Out out, format-string<Args...> fmt, Args&&... args); template<class Out, class... Args> - Out format_to(Out out, wformat-string<Args...> fmt, const Args&... args); + Out format_to(Out out, wformat-string<Args...> fmt, Args&&... args); template<class Out, class... Args> - Out format_to(Out out, const locale& loc, format-string<Args...> fmt, const Args&... args); + Out format_to(Out out, const locale& loc, format-string<Args...> fmt, Args&&... args); template<class Out, class... Args> - Out format_to(Out out, const locale& loc, wformat-string<Args...> fmt, const Args&... args); + Out format_to(Out out, const locale& loc, wformat-string<Args...> fmt, Args&&... args); template<class Out> Out vformat_to(Out out, string_view fmt, format_args args); @@ -75,27 +75,27 @@ namespace std { }; template<class Out, class... Args> format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n, - format-string<Args...> fmt, const Args&... args); + format-string<Args...> fmt, Args&&... args); template<class Out, class... Args> format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n, - wformat-string<Args...> fmt, const Args&... args); + wformat-string<Args...> fmt, Args&&... args); template<class Out, class... Args> format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n, const locale& loc, format-string<Args...> fmt, - const Args&... args); + Args&&... args); template<class Out, class... Args> format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n, const locale& loc, wformat-string<Args...> fmt, - const Args&... args); + Args&&... args); template<class... Args> - size_t formatted_size(format-string<Args...> fmt, const Args&... args); + size_t formatted_size(format-string<Args...> fmt, Args&&... args); template<class... Args> - size_t formatted_size(wformat-string<Args...> fmt, const Args&... args); + size_t formatted_size(wformat-string<Args...> fmt, Args&&... args); template<class... Args> - size_t formatted_size(const locale& loc, format-string<Args...> fmt, const Args&... args); + size_t formatted_size(const locale& loc, format-string<Args...> fmt, Args&&... args); template<class... Args> - size_t formatted_size(const locale& loc, wformat-string<Args...> fmt, const Args&... args); + size_t formatted_size(const locale& loc, wformat-string<Args...> fmt, Args&&... args); // [format.formatter], formatter template<class T, class charT = char> struct formatter; @@ -117,10 +117,10 @@ namespace std { template<class Context = format_context, class... Args> format-arg-store<Context, Args...> - make_format_args(const Args&... args); + make_format_args(Args&&... args); template<class... Args> format-arg-store<wformat_context, Args...> - make_wformat_args(const Args&... args); + make_wformat_args(Args&&... args); // [format.error], class format_error class format_error; @@ -190,26 +190,15 @@ using format_args = basic_format_args<format_context>; using wformat_args = basic_format_args<wformat_context>; #endif -// TODO FMT This helper wrapper can probably be removed after P2418 has been -// implemented. -template <class _Context, class... _Args> -_LIBCPP_HIDE_FROM_ABI __format_arg_store<_Context, _Args...> -__make_format_args(_Args&&... __args) { - return _VSTD::__format_arg_store<_Context, _Args...>( - _VSTD::forward<_Args>(__args)...); -} - -// TODO FMT After P2418 specify the return type instead of using auto. template <class _Context = format_context, class... _Args> -_LIBCPP_HIDE_FROM_ABI auto make_format_args(const _Args&... __args) { - return _VSTD::__make_format_args<_Context>(__args...); +_LIBCPP_HIDE_FROM_ABI __format_arg_store<_Context, _Args...> make_format_args(_Args&&... __args) { + return _VSTD::__format_arg_store<_Context, _Args...>(__args...); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -// TODO FMT After P2418 specify the return type instead of using auto. template <class... _Args> -_LIBCPP_HIDE_FROM_ABI auto make_wformat_args(const _Args&... __args) { - return _VSTD::make_format_args<wformat_context>(__args...); +_LIBCPP_HIDE_FROM_ABI __format_arg_store<wformat_context, _Args...> make_wformat_args(_Args&&... __args) { + return _VSTD::__format_arg_store<wformat_context, _Args...>(__args...); } #endif @@ -387,6 +376,7 @@ __handle_replacement_field(const _CharT* __begin, const _CharT* __end, __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __parse_ctx); + bool __parse = *__r.__ptr == _CharT(':'); switch (*__r.__ptr) { case _CharT(':'): // The arg-id has a format-specifier, advance the input to the format-spec. @@ -406,7 +396,7 @@ __handle_replacement_field(const _CharT* __begin, const _CharT* __end, if (__type == __arg_t::__handle) __ctx.__handle(__r.__value).__parse(__parse_ctx); else - __format::__compile_time_visit_format_arg(__parse_ctx, __ctx, __type); + __format::__compile_time_visit_format_arg(__parse_ctx, __ctx, __type); } else _VSTD::visit_format_arg( [&](auto __arg) { @@ -416,7 +406,8 @@ __handle_replacement_field(const _CharT* __begin, const _CharT* __end, __arg.format(__parse_ctx, __ctx); else { formatter<decltype(__arg), _CharT> __formatter; - __parse_ctx.advance_to(__formatter.parse(__parse_ctx)); + if (__parse) + __parse_ctx.advance_to(__formatter.parse(__parse_ctx)); __ctx.advance_to(__formatter.format(__arg, __ctx)); } }, @@ -563,7 +554,7 @@ vformat_to(_OutIt __out_it, wstring_view __fmt, wformat_args __args) { template <output_iterator<const char&> _OutIt, class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt -format_to(_OutIt __out_it, __format_string_t<_Args...> __fmt, const _Args&... __args) { +format_to(_OutIt __out_it, __format_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), __fmt.__str_, _VSTD::make_format_args(__args...)); } @@ -571,7 +562,7 @@ format_to(_OutIt __out_it, __format_string_t<_Args...> __fmt, const _Args&... __ #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <output_iterator<const wchar_t&> _OutIt, class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt -format_to(_OutIt __out_it, __wformat_string_t<_Args...> __fmt, const _Args&... __args) { +format_to(_OutIt __out_it, __wformat_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), __fmt.__str_, _VSTD::make_wformat_args(__args...)); } @@ -595,14 +586,14 @@ vformat(wstring_view __fmt, wformat_args __args) { template <class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string format(__format_string_t<_Args...> __fmt, - const _Args&... __args) { + _Args&&... __args) { return _VSTD::vformat(__fmt.__str_, _VSTD::make_format_args(__args...)); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring -format(__wformat_string_t<_Args...> __fmt, const _Args&... __args) { +format(__wformat_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat(__fmt.__str_, _VSTD::make_wformat_args(__args...)); } #endif @@ -619,7 +610,7 @@ _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> __vformat_to_n(_OutIt __out_it, template <output_iterator<const char&> _OutIt, class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> -format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, __format_string_t<_Args...> __fmt, const _Args&... __args) { +format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, __format_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformat_to_n<format_context>(_VSTD::move(__out_it), __n, __fmt.__str_, _VSTD::make_format_args(__args...)); } @@ -627,7 +618,7 @@ format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, __format_string_t<_A template <output_iterator<const wchar_t&> _OutIt, class... _Args> _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, __wformat_string_t<_Args...> __fmt, - const _Args&... __args) { + _Args&&... __args) { return _VSTD::__vformat_to_n<wformat_context>(_VSTD::move(__out_it), __n, __fmt.__str_, _VSTD::make_wformat_args(__args...)); } #endif @@ -642,14 +633,14 @@ _LIBCPP_HIDE_FROM_ABI size_t __vformatted_size(basic_string_view<_CharT> __fmt, template <class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t -formatted_size(__format_string_t<_Args...> __fmt, const _Args&... __args) { +formatted_size(__format_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(__fmt.__str_, basic_format_args{_VSTD::make_format_args(__args...)}); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t -formatted_size(__wformat_string_t<_Args...> __fmt, const _Args&... __args) { +formatted_size(__wformat_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(__fmt.__str_, basic_format_args{_VSTD::make_wformat_args(__args...)}); } #endif @@ -694,7 +685,7 @@ _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt v template <output_iterator<const char&> _OutIt, class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt -format_to(_OutIt __out_it, locale __loc, __format_string_t<_Args...> __fmt, const _Args&... __args) { +format_to(_OutIt __out_it, locale __loc, __format_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt.__str_, _VSTD::make_format_args(__args...)); } @@ -702,7 +693,7 @@ format_to(_OutIt __out_it, locale __loc, __format_string_t<_Args...> __fmt, cons #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <output_iterator<const wchar_t&> _OutIt, class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt -format_to(_OutIt __out_it, locale __loc, __wformat_string_t<_Args...> __fmt, const _Args&... __args) { +format_to(_OutIt __out_it, locale __loc, __wformat_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt.__str_, _VSTD::make_wformat_args(__args...)); } @@ -729,7 +720,7 @@ vformat(locale __loc, wstring_view __fmt, wformat_args __args) { template <class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string format(locale __loc, __format_string_t<_Args...> __fmt, - const _Args&... __args) { + _Args&&... __args) { return _VSTD::vformat(_VSTD::move(__loc), __fmt.__str_, _VSTD::make_format_args(__args...)); } @@ -737,7 +728,7 @@ _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string f #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring -format(locale __loc, __wformat_string_t<_Args...> __fmt, const _Args&... __args) { +format(locale __loc, __wformat_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat(_VSTD::move(__loc), __fmt.__str_, _VSTD::make_wformat_args(__args...)); } @@ -757,7 +748,7 @@ _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> __vformat_to_n(_OutIt __out_it, template <output_iterator<const char&> _OutIt, class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc, __format_string_t<_Args...> __fmt, - const _Args&... __args) { + _Args&&... __args) { return _VSTD::__vformat_to_n<format_context>(_VSTD::move(__out_it), __n, _VSTD::move(__loc), __fmt.__str_, _VSTD::make_format_args(__args...)); } @@ -766,7 +757,7 @@ format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc, __form template <output_iterator<const wchar_t&> _OutIt, class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc, __wformat_string_t<_Args...> __fmt, - const _Args&... __args) { + _Args&&... __args) { return _VSTD::__vformat_to_n<wformat_context>(_VSTD::move(__out_it), __n, _VSTD::move(__loc), __fmt.__str_, _VSTD::make_wformat_args(__args...)); } @@ -783,14 +774,14 @@ _LIBCPP_HIDE_FROM_ABI size_t __vformatted_size(locale __loc, basic_string_view<_ template <class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t -formatted_size(locale __loc, __format_string_t<_Args...> __fmt, const _Args&... __args) { +formatted_size(locale __loc, __format_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(_VSTD::move(__loc), __fmt.__str_, basic_format_args{_VSTD::make_format_args(__args...)}); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t -formatted_size(locale __loc, __wformat_string_t<_Args...> __fmt, const _Args&... __args) { +formatted_size(locale __loc, __wformat_string_t<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(_VSTD::move(__loc), __fmt.__str_, basic_format_args{_VSTD::make_wformat_args(__args...)}); } #endif diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 6a7272027351..aab3b8715d01 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -703,15 +703,11 @@ public: template <class _InputIterator> forward_list(_InputIterator __f, _InputIterator __l, - typename enable_if< - __is_cpp17_input_iterator<_InputIterator>::value - >::type* = nullptr); + __enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>* = nullptr); template <class _InputIterator> forward_list(_InputIterator __f, _InputIterator __l, const allocator_type& __a, - typename enable_if< - __is_cpp17_input_iterator<_InputIterator>::value - >::type* = nullptr); + __enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>* = nullptr); forward_list(const forward_list& __x); forward_list(const forward_list& __x, const __type_identity_t<allocator_type>& __a); @@ -743,11 +739,7 @@ public: // ~forward_list() = default; template <class _InputIterator> - typename enable_if - < - __is_cpp17_input_iterator<_InputIterator>::value, - void - >::type + __enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, void> assign(_InputIterator __f, _InputIterator __l); void assign(size_type __n, const value_type& __v); @@ -823,12 +815,8 @@ public: iterator insert_after(const_iterator __p, const value_type& __v); iterator insert_after(const_iterator __p, size_type __n, const value_type& __v); template <class _InputIterator> - _LIBCPP_INLINE_VISIBILITY - typename enable_if - < - __is_cpp17_input_iterator<_InputIterator>::value, - iterator - >::type + _LIBCPP_INLINE_VISIBILITY + __enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, iterator> insert_after(const_iterator __p, _InputIterator __f, _InputIterator __l); iterator erase_after(const_iterator __p); @@ -977,9 +965,7 @@ forward_list<_Tp, _Alloc>::forward_list(size_type __n, const value_type& __v) template <class _Tp, class _Alloc> template <class _InputIterator> forward_list<_Tp, _Alloc>::forward_list(_InputIterator __f, _InputIterator __l, - typename enable_if< - __is_cpp17_input_iterator<_InputIterator>::value - >::type*) + __enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>*) { insert_after(cbefore_begin(), __f, __l); } @@ -988,9 +974,7 @@ template <class _Tp, class _Alloc> template <class _InputIterator> forward_list<_Tp, _Alloc>::forward_list(_InputIterator __f, _InputIterator __l, const allocator_type& __a, - typename enable_if< - __is_cpp17_input_iterator<_InputIterator>::value - >::type*) + __enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>*) : base(__a) { insert_after(cbefore_begin(), __f, __l); @@ -1100,11 +1084,7 @@ forward_list<_Tp, _Alloc>::operator=(initializer_list<value_type> __il) template <class _Tp, class _Alloc> template <class _InputIterator> -typename enable_if -< - __is_cpp17_input_iterator<_InputIterator>::value, - void ->::type +__enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, void> forward_list<_Tp, _Alloc>::assign(_InputIterator __f, _InputIterator __l) { iterator __i = before_begin(); @@ -1296,11 +1276,7 @@ forward_list<_Tp, _Alloc>::insert_after(const_iterator __p, size_type __n, template <class _Tp, class _Alloc> template <class _InputIterator> -typename enable_if -< - __is_cpp17_input_iterator<_InputIterator>::value, - typename forward_list<_Tp, _Alloc>::iterator ->::type +__enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, typename forward_list<_Tp, _Alloc>::iterator> forward_list<_Tp, _Alloc>::insert_after(const_iterator __p, _InputIterator __f, _InputIterator __l) { diff --git a/libcxx/include/future b/libcxx/include/future index f4a5b43eef08..cedab3608ad2 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -525,12 +525,12 @@ _LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY #ifndef _LIBCPP_NO_EXCEPTIONS _LIBCPP_AVAILABILITY_FUTURE_ERROR #endif -void __throw_future_error(future_errc _Ev) +void __throw_future_error(future_errc __ev) { #ifndef _LIBCPP_NO_EXCEPTIONS - throw future_error(make_error_code(_Ev)); + throw future_error(make_error_code(__ev)); #else - ((void)_Ev); + ((void)__ev); _VSTD::abort(); #endif } @@ -1106,7 +1106,7 @@ future<_Rp>::future(__assoc_state<_Rp>* __state) struct __release_shared_count { - void operator()(__shared_count* p) {p->__release_shared();} + void operator()(__shared_count* __p) {__p->__release_shared();} }; template <class _Rp> diff --git a/libcxx/include/list b/libcxx/include/list index d75b15c060be..1db29d14b842 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -870,10 +870,10 @@ public: template <class _InpIter> list(_InpIter __f, _InpIter __l, - typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type* = 0); + __enable_if_t<__is_cpp17_input_iterator<_InpIter>::value>* = 0); template <class _InpIter> list(_InpIter __f, _InpIter __l, const allocator_type& __a, - typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type* = 0); + __enable_if_t<__is_cpp17_input_iterator<_InpIter>::value>* = 0); list(const list& __c); list(const list& __c, const __type_identity_t<allocator_type>& __a); @@ -905,7 +905,7 @@ public: template <class _InpIter> void assign(_InpIter __f, _InpIter __l, - typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type* = 0); + __enable_if_t<__is_cpp17_input_iterator<_InpIter>::value>* = 0); void assign(size_type __n, const value_type& __x); _LIBCPP_INLINE_VISIBILITY @@ -1022,7 +1022,7 @@ public: iterator insert(const_iterator __p, size_type __n, const value_type& __x); template <class _InpIter> iterator insert(const_iterator __p, _InpIter __f, _InpIter __l, - typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type* = 0); + __enable_if_t<__is_cpp17_input_iterator<_InpIter>::value>* = 0); _LIBCPP_INLINE_VISIBILITY void swap(list& __c) @@ -1220,7 +1220,7 @@ list<_Tp, _Alloc>::list(size_type __n, const value_type& __x) template <class _Tp, class _Alloc> template <class _InpIter> list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l, - typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type*) + __enable_if_t<__is_cpp17_input_iterator<_InpIter>::value>*) { _VSTD::__debug_db_insert_c(this); for (; __f != __l; ++__f) @@ -1230,7 +1230,7 @@ list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l, template <class _Tp, class _Alloc> template <class _InpIter> list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l, const allocator_type& __a, - typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type*) + __enable_if_t<__is_cpp17_input_iterator<_InpIter>::value>*) : base(__a) { _VSTD::__debug_db_insert_c(this); @@ -1355,7 +1355,7 @@ template <class _Tp, class _Alloc> template <class _InpIter> void list<_Tp, _Alloc>::assign(_InpIter __f, _InpIter __l, - typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type*) + __enable_if_t<__is_cpp17_input_iterator<_InpIter>::value>*) { iterator __i = begin(); iterator __e = end(); @@ -1460,7 +1460,7 @@ template <class _Tp, class _Alloc> template <class _InpIter> typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __p, _InpIter __f, _InpIter __l, - typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type*) + __enable_if_t<__is_cpp17_input_iterator<_InpIter>::value>*) { _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "list::insert(iterator, range) called with an iterator not referring to this list"); diff --git a/libcxx/include/map b/libcxx/include/map index 106ed5259ed9..e1d5fa8a25d8 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -582,9 +582,9 @@ public: _NOEXCEPT_(is_nothrow_default_constructible<_Compare>::value) : _Compare() {} _LIBCPP_INLINE_VISIBILITY - __map_value_compare(_Compare c) + __map_value_compare(_Compare __c) _NOEXCEPT_(is_nothrow_copy_constructible<_Compare>::value) - : _Compare(c) {} + : _Compare(__c) {} _LIBCPP_INLINE_VISIBILITY const _Compare& key_comp() const _NOEXCEPT {return *this;} _LIBCPP_INLINE_VISIBILITY @@ -627,9 +627,9 @@ public: _NOEXCEPT_(is_nothrow_default_constructible<_Compare>::value) : comp() {} _LIBCPP_INLINE_VISIBILITY - __map_value_compare(_Compare c) + __map_value_compare(_Compare __c) _NOEXCEPT_(is_nothrow_copy_constructible<_Compare>::value) - : comp(c) {} + : comp(__c) {} _LIBCPP_INLINE_VISIBILITY const _Compare& key_comp() const _NOEXCEPT {return comp;} @@ -792,9 +792,7 @@ public: } template <class _ValueTp, - class = typename enable_if< - __is_same_uncvref<_ValueTp, value_type>::value - >::type + class = __enable_if_t<__is_same_uncvref<_ValueTp, value_type>::value> > _LIBCPP_INLINE_VISIBILITY __value_type& operator=(_ValueTp&& __v) @@ -992,7 +990,7 @@ public: protected: key_compare comp; - _LIBCPP_INLINE_VISIBILITY value_compare(key_compare c) : comp(c) {} + _LIBCPP_INLINE_VISIBILITY value_compare(key_compare __c) : comp(__c) {} public: _LIBCPP_INLINE_VISIBILITY bool operator()(const value_type& __x, const value_type& __y) const @@ -1230,13 +1228,13 @@ public: } template <class _Pp, - class = typename enable_if<is_constructible<value_type, _Pp>::value>::type> + class = __enable_if_t<is_constructible<value_type, _Pp>::value> > _LIBCPP_INLINE_VISIBILITY pair<iterator, bool> insert(_Pp&& __p) {return __tree_.__insert_unique(_VSTD::forward<_Pp>(__p));} template <class _Pp, - class = typename enable_if<is_constructible<value_type, _Pp>::value>::type> + class = __enable_if_t<is_constructible<value_type, _Pp>::value> > _LIBCPP_INLINE_VISIBILITY iterator insert(const_iterator __pos, _Pp&& __p) {return __tree_.__insert_unique(__pos.__i_, _VSTD::forward<_Pp>(__p));} @@ -1456,11 +1454,11 @@ public: #if _LIBCPP_STD_VER > 11 template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, iterator> find(const _K2& __k) {return __tree_.find(__k);} template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,const_iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, const_iterator> find(const _K2& __k) const {return __tree_.find(__k);} #endif @@ -1470,7 +1468,7 @@ public: #if _LIBCPP_STD_VER > 11 template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,size_type>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, size_type> count(const _K2& __k) const {return __tree_.__count_multi(__k);} #endif @@ -1479,7 +1477,7 @@ public: bool contains(const key_type& __k) const {return find(__k) != end();} template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value, bool>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, bool> contains(const _K2& __k) const { return find(__k) != end(); } #endif // _LIBCPP_STD_VER > 17 @@ -1492,12 +1490,12 @@ public: #if _LIBCPP_STD_VER > 11 template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, iterator> lower_bound(const _K2& __k) {return __tree_.lower_bound(__k);} template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,const_iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, const_iterator> lower_bound(const _K2& __k) const {return __tree_.lower_bound(__k);} #endif @@ -1510,11 +1508,11 @@ public: #if _LIBCPP_STD_VER > 11 template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, iterator> upper_bound(const _K2& __k) {return __tree_.upper_bound(__k);} template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,const_iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, const_iterator> upper_bound(const _K2& __k) const {return __tree_.upper_bound(__k);} #endif @@ -1527,11 +1525,11 @@ public: #if _LIBCPP_STD_VER > 11 template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,pair<iterator,iterator>>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, pair<iterator,iterator>> equal_range(const _K2& __k) {return __tree_.__equal_range_multi(__k);} template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,pair<const_iterator,const_iterator>>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, pair<const_iterator,const_iterator>> equal_range(const _K2& __k) const {return __tree_.__equal_range_multi(__k);} #endif @@ -1769,7 +1767,7 @@ public: key_compare comp; _LIBCPP_INLINE_VISIBILITY - value_compare(key_compare c) : comp(c) {} + value_compare(key_compare __c) : comp(__c) {} public: _LIBCPP_INLINE_VISIBILITY bool operator()(const value_type& __x, const value_type& __y) const @@ -2000,13 +1998,13 @@ public: } template <class _Pp, - class = typename enable_if<is_constructible<value_type, _Pp>::value>::type> + class = __enable_if_t<is_constructible<value_type, _Pp>::value>> _LIBCPP_INLINE_VISIBILITY iterator insert(_Pp&& __p) {return __tree_.__insert_multi(_VSTD::forward<_Pp>(__p));} template <class _Pp, - class = typename enable_if<is_constructible<value_type, _Pp>::value>::type> + class = __enable_if_t<is_constructible<value_type, _Pp>::value>> _LIBCPP_INLINE_VISIBILITY iterator insert(const_iterator __pos, _Pp&& __p) {return __tree_.__insert_multi(__pos.__i_, _VSTD::forward<_Pp>(__p));} @@ -2128,11 +2126,11 @@ public: #if _LIBCPP_STD_VER > 11 template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, iterator> find(const _K2& __k) {return __tree_.find(__k);} template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,const_iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, const_iterator> find(const _K2& __k) const {return __tree_.find(__k);} #endif @@ -2142,7 +2140,7 @@ public: #if _LIBCPP_STD_VER > 11 template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,size_type>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, size_type> count(const _K2& __k) const {return __tree_.__count_multi(__k);} #endif @@ -2151,7 +2149,7 @@ public: bool contains(const key_type& __k) const {return find(__k) != end();} template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value, bool>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, bool> contains(const _K2& __k) const { return find(__k) != end(); } #endif // _LIBCPP_STD_VER > 17 @@ -2164,12 +2162,12 @@ public: #if _LIBCPP_STD_VER > 11 template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, iterator> lower_bound(const _K2& __k) {return __tree_.lower_bound(__k);} template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,const_iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, const_iterator> lower_bound(const _K2& __k) const {return __tree_.lower_bound(__k);} #endif @@ -2182,11 +2180,11 @@ public: #if _LIBCPP_STD_VER > 11 template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, iterator> upper_bound(const _K2& __k) {return __tree_.upper_bound(__k);} template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,const_iterator>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, const_iterator> upper_bound(const _K2& __k) const {return __tree_.upper_bound(__k);} #endif @@ -2199,11 +2197,11 @@ public: #if _LIBCPP_STD_VER > 11 template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,pair<iterator,iterator>>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, pair<iterator,iterator>> equal_range(const _K2& __k) {return __tree_.__equal_range_multi(__k);} template <typename _K2> _LIBCPP_INLINE_VISIBILITY - typename enable_if<__is_transparent<_Compare, _K2>::value,pair<const_iterator,const_iterator>>::type + __enable_if_t<__is_transparent<_Compare, _K2>::value, pair<const_iterator,const_iterator>> equal_range(const _K2& __k) const {return __tree_.__equal_range_multi(__k);} #endif diff --git a/libcxx/include/memory b/libcxx/include/memory index 299746022274..ec9f5773929f 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -1096,8 +1096,8 @@ struct __builtin_new_allocator { _LIBCPP_CONSTEXPR explicit __builtin_new_deleter(size_t __size, size_t __align) : __size_(__size), __align_(__align) {} - void operator()(void* p) const _NOEXCEPT { - _VSTD::__libcpp_deallocate(p, __size_, __align_); + void operator()(void* __p) const _NOEXCEPT { + _VSTD::__libcpp_deallocate(__p, __size_, __align_); } private: diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 3e191dc0f86d..25e1c6e6d0e2 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -318,21 +318,31 @@ module std [system] { module ranges_count { private header "__algorithm/ranges_count.h" } module ranges_count_if { private header "__algorithm/ranges_count_if.h" } module ranges_equal { private header "__algorithm/ranges_equal.h" } + module ranges_equal_range { private header "__algorithm/ranges_equal_range.h" } module ranges_fill { private header "__algorithm/ranges_fill.h" } module ranges_fill_n { private header "__algorithm/ranges_fill_n.h" } module ranges_find { private header "__algorithm/ranges_find.h" } + module ranges_find_end { private header "__algorithm/ranges_find_end.h" } module ranges_find_first_of { private header "__algorithm/ranges_find_first_of.h" } module ranges_find_if { private header "__algorithm/ranges_find_if.h" } module ranges_find_if_not { private header "__algorithm/ranges_find_if_not.h" } module ranges_for_each { private header "__algorithm/ranges_for_each.h" } module ranges_for_each_n { private header "__algorithm/ranges_for_each_n.h" } + module ranges_generate { private header "__algorithm/ranges_generate.h" } + module ranges_generate_n { private header "__algorithm/ranges_generate_n.h" } + module ranges_includes { private header "__algorithm/ranges_includes.h" } + module ranges_inplace_merge { private header "__algorithm/ranges_inplace_merge.h" } + module ranges_is_heap { private header "__algorithm/ranges_is_heap.h" } + module ranges_is_heap_until { private header "__algorithm/ranges_is_heap_until.h" } module ranges_is_partitioned { private header "__algorithm/ranges_is_partitioned.h" } module ranges_is_sorted { private header "__algorithm/ranges_is_sorted.h" } module ranges_is_sorted_until { private header "__algorithm/ranges_is_sorted_until.h" } module ranges_lexicographical_compare { private header "__algorithm/ranges_lexicographical_compare.h" } module ranges_lower_bound { private header "__algorithm/ranges_lower_bound.h" } + module ranges_make_heap { private header "__algorithm/ranges_make_heap.h" } module ranges_max { private header "__algorithm/ranges_max.h" } module ranges_max_element { private header "__algorithm/ranges_max_element.h" } + module ranges_merge { private header "__algorithm/ranges_merge.h" } module ranges_min { private header "__algorithm/ranges_min.h" } module ranges_min_element { private header "__algorithm/ranges_min_element.h" } module ranges_minmax { private header "__algorithm/ranges_minmax.h" } @@ -341,13 +351,39 @@ module std [system] { module ranges_move { private header "__algorithm/ranges_move.h" } module ranges_move_backward { private header "__algorithm/ranges_move_backward.h" } module ranges_none_of { private header "__algorithm/ranges_none_of.h" } + module ranges_nth_element { private header "__algorithm/ranges_nth_element.h" } + module ranges_partial_sort_copy { private header "__algorithm/ranges_partial_sort_copy.h" } + module ranges_partition { private header "__algorithm/ranges_partition.h" } + module ranges_partition_copy { private header "__algorithm/ranges_partition_copy.h" } + module ranges_partition_point { private header "__algorithm/ranges_partition_point.h" } + module ranges_pop_heap { private header "__algorithm/ranges_pop_heap.h" } + module ranges_push_heap { private header "__algorithm/ranges_push_heap.h" } + module ranges_remove { private header "__algorithm/ranges_remove.h" } + module ranges_remove_copy { private header "__algorithm/ranges_remove_copy.h" } + module ranges_remove_copy_if { private header "__algorithm/ranges_remove_copy_if.h" } + module ranges_remove_if { private header "__algorithm/ranges_remove_if.h" } module ranges_replace { private header "__algorithm/ranges_replace.h" } + module ranges_replace_copy { private header "__algorithm/ranges_replace_copy.h" } + module ranges_replace_copy_if { private header "__algorithm/ranges_replace_copy_if.h" } module ranges_replace_if { private header "__algorithm/ranges_replace_if.h" } module ranges_reverse { private header "__algorithm/ranges_reverse.h" } + module ranges_reverse_copy { private header "__algorithm/ranges_reverse_copy.h" } + module ranges_rotate_copy { private header "__algorithm/ranges_rotate_copy.h" } + module ranges_search { private header "__algorithm/ranges_search.h" } + module ranges_search_n { private header "__algorithm/ranges_search_n.h" } + module ranges_set_difference { private header "__algorithm/ranges_set_difference.h" } + module ranges_set_intersection { private header "__algorithm/ranges_set_intersection.h" } + module ranges_set_symmetric_difference { private header "__algorithm/ranges_set_symmetric_difference.h" } + module ranges_set_union { private header "__algorithm/ranges_set_union.h" } + module ranges_shuffle { private header "__algorithm/ranges_shuffle.h" } module ranges_sort { private header "__algorithm/ranges_sort.h" } + module ranges_sort_heap { private header "__algorithm/ranges_sort_heap.h" } + module ranges_stable_partition { private header "__algorithm/ranges_stable_partition.h" } module ranges_stable_sort { private header "__algorithm/ranges_stable_sort.h" } module ranges_swap_ranges { private header "__algorithm/ranges_swap_ranges.h" } module ranges_transform { private header "__algorithm/ranges_transform.h" } + module ranges_unique { private header "__algorithm/ranges_unique.h" } + module ranges_unique_copy { private header "__algorithm/ranges_unique_copy.h" } module ranges_upper_bound { private header "__algorithm/ranges_upper_bound.h" } module remove { private header "__algorithm/remove.h" } module remove_copy { private header "__algorithm/remove_copy.h" } @@ -747,6 +783,11 @@ module std [system] { header "latch" export * } + + module __debug_utils { + module randomize_range { private header "__debug_utils/randomize_range.h" } + } + module limits { header "limits" export * diff --git a/libcxx/include/regex b/libcxx/include/regex index a117c50f3984..850fe099df1e 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -1330,9 +1330,9 @@ regex_traits<_CharT>::isctype(char_type __c, char_class_type __m) const } inline _LIBCPP_INLINE_VISIBILITY -bool __is_07(unsigned char c) +bool __is_07(unsigned char __c) { - return (c & 0xF8u) == + return (__c & 0xF8u) == #if defined(__MVS__) && !defined(__NATIVE_ASCII_F) 0xF0; #else @@ -1341,9 +1341,9 @@ bool __is_07(unsigned char c) } inline _LIBCPP_INLINE_VISIBILITY -bool __is_89(unsigned char c) +bool __is_89(unsigned char __c) { - return (c & 0xFEu) == + return (__c & 0xFEu) == #if defined(__MVS__) && !defined(__NATIVE_ASCII_F) 0xF8; #else @@ -1352,12 +1352,12 @@ bool __is_89(unsigned char c) } inline _LIBCPP_INLINE_VISIBILITY -unsigned char __to_lower(unsigned char c) +unsigned char __to_lower(unsigned char __c) { #if defined(__MVS__) && !defined(__NATIVE_ASCII_F) return c & 0xBF; #else - return c | 0x20; + return __c | 0x20; #endif } @@ -2057,9 +2057,9 @@ __word_boundary<_CharT, _Traits>::__exec(__state& __s) const template <class _CharT> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR -bool __is_eol(_CharT c) +bool __is_eol(_CharT __c) { - return c == '\r' || c == '\n'; + return __c == '\r' || __c == '\n'; } template <class _CharT> @@ -2748,12 +2748,7 @@ public: template <class _InputIterator> _LIBCPP_INLINE_VISIBILITY - typename enable_if - < - __is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value, - basic_regex& - >::type + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIterator>::value, basic_regex&>::type assign(_InputIterator __first, _InputIterator __last, flag_type __f = regex_constants::ECMAScript) { @@ -2968,7 +2963,7 @@ private: __parse_awk_escape(_ForwardIterator __first, _ForwardIterator __last, basic_string<_CharT>* __str = nullptr); - bool __test_back_ref(_CharT c); + bool __test_back_ref(_CharT); _LIBCPP_INLINE_VISIBILITY void __push_l_anchor(); @@ -4787,9 +4782,9 @@ basic_regex<_CharT, _Traits>::__parse_egrep(_ForwardIterator __first, template <class _CharT, class _Traits> bool -basic_regex<_CharT, _Traits>::__test_back_ref(_CharT c) +basic_regex<_CharT, _Traits>::__test_back_ref(_CharT __c) { - unsigned __val = __traits_.value(c, 10); + unsigned __val = __traits_.value(__c, 10); if (__val >= 1 && __val <= 9) { if (__val > mark_count()) diff --git a/libcxx/include/scoped_allocator b/libcxx/include/scoped_allocator index b505aad9dcf7..cf82affba78f 100644 --- a/libcxx/include/scoped_allocator +++ b/libcxx/include/scoped_allocator @@ -219,10 +219,10 @@ protected: is_constructible<outer_allocator_type, _OuterA2>::value >::type> _LIBCPP_INLINE_VISIBILITY - __scoped_allocator_storage(_OuterA2&& __outerAlloc, - const _InnerAllocs& ...__innerAllocs) _NOEXCEPT - : outer_allocator_type(_VSTD::forward<_OuterA2>(__outerAlloc)), - __inner_(__innerAllocs...) {} + __scoped_allocator_storage(_OuterA2&& __outer_alloc, + const _InnerAllocs& ...__inner_allocs) _NOEXCEPT + : outer_allocator_type(_VSTD::forward<_OuterA2>(__outer_alloc)), + __inner_(__inner_allocs...) {} template <class _OuterA2, class = typename enable_if< @@ -300,8 +300,8 @@ protected: is_constructible<outer_allocator_type, _OuterA2>::value >::type> _LIBCPP_INLINE_VISIBILITY - __scoped_allocator_storage(_OuterA2&& __outerAlloc) _NOEXCEPT - : outer_allocator_type(_VSTD::forward<_OuterA2>(__outerAlloc)) {} + __scoped_allocator_storage(_OuterA2&& __outer_alloc) _NOEXCEPT + : outer_allocator_type(_VSTD::forward<_OuterA2>(__outer_alloc)) {} template <class _OuterA2, class = typename enable_if< @@ -444,9 +444,9 @@ public: is_constructible<outer_allocator_type, _OuterA2>::value >::type> _LIBCPP_INLINE_VISIBILITY - scoped_allocator_adaptor(_OuterA2&& __outerAlloc, - const _InnerAllocs& ...__innerAllocs) _NOEXCEPT - : base(_VSTD::forward<_OuterA2>(__outerAlloc), __innerAllocs...) {} + scoped_allocator_adaptor(_OuterA2&& __outer_alloc, + const _InnerAllocs& ...__inner_allocs) _NOEXCEPT + : base(_VSTD::forward<_OuterA2>(__outer_alloc), __inner_allocs...) {} // scoped_allocator_adaptor(const scoped_allocator_adaptor& __other) = default; template <class _OuterA2, class = typename enable_if< diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex index a089aa9fa817..f85cf6ec4c4f 100644 --- a/libcxx/include/shared_mutex +++ b/libcxx/include/shared_mutex @@ -400,9 +400,9 @@ public: void lock(); bool try_lock(); template <class Rep, class Period> - bool try_lock_for(const chrono::duration<Rep, Period>& rel_time); + bool try_lock_for(const chrono::duration<Rep, Period>& __rel_time); template <class Clock, class Duration> - bool try_lock_until(const chrono::time_point<Clock, Duration>& abs_time); + bool try_lock_until(const chrono::time_point<Clock, Duration>& __abs_time); void unlock(); // Setters diff --git a/libcxx/include/string_view b/libcxx/include/string_view index 28013e7cb08e..a84ed5019614 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -324,8 +324,8 @@ public: ranges::sized_range<_Range> && is_same_v<ranges::range_value_t<_Range>, _CharT> && !is_convertible_v<_Range, const _CharT*> && - (!requires(remove_cvref_t<_Range>& d) { - d.operator _VSTD::basic_string_view<_CharT, _Traits>(); + (!requires(remove_cvref_t<_Range>& __d) { + __d.operator _VSTD::basic_string_view<_CharT, _Traits>(); }) && (!requires { typename remove_reference_t<_Range>::traits_type; diff --git a/libcxx/include/system_error b/libcxx/include/system_error index 2db901847d83..3b705aa81ebc 100644 --- a/libcxx/include/system_error +++ b/libcxx/include/system_error @@ -236,7 +236,7 @@ class _LIBCPP_HIDDEN __do_message : public error_category { public: - virtual string message(int ev) const; + virtual string message(int __ev) const; }; _LIBCPP_FUNC_VIS const error_category& generic_category() _NOEXCEPT; @@ -482,7 +482,7 @@ private: }; _LIBCPP_NORETURN _LIBCPP_FUNC_VIS -void __throw_system_error(int ev, const char* what_arg); +void __throw_system_error(int __ev, const char* __what_arg); _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 221eb23dd3f8..79527cca5853 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -482,10 +482,7 @@ struct _LIBCPP_DECLSPEC_EMPTY_BASES __tuple_impl<__tuple_indices<_Indx...>, _Tp. {} template <class _Tuple, - class = typename enable_if - < - __tuple_constructible<_Tuple, tuple<_Tp...> >::value - >::type + class = __enable_if_t<__tuple_constructible<_Tuple, tuple<_Tp...> >::value> > _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 __tuple_impl(_Tuple&& __t) _NOEXCEPT_((__all<is_nothrow_constructible<_Tp, typename tuple_element<_Indx, @@ -495,10 +492,7 @@ struct _LIBCPP_DECLSPEC_EMPTY_BASES __tuple_impl<__tuple_indices<_Indx...>, _Tp. {} template <class _Alloc, class _Tuple, - class = typename enable_if - < - __tuple_constructible<_Tuple, tuple<_Tp...> >::value - >::type + class = __enable_if_t<__tuple_constructible<_Tuple, tuple<_Tp...> >::value> > _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 __tuple_impl(allocator_arg_t, const _Alloc& __a, _Tuple&& __t) @@ -1336,11 +1330,7 @@ tuple(allocator_arg_t, _Alloc, tuple<_Tp...>) -> tuple<_Tp...>; template <class ..._Tp> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -typename enable_if -< - __all<__is_swappable<_Tp>::value...>::value, - void ->::type +__enable_if_t<__all<__is_swappable<_Tp>::value...>::value, void> swap(tuple<_Tp...>& __t, tuple<_Tp...>& __u) _NOEXCEPT_(__all<__is_nothrow_swappable<_Tp>::value...>::value) {__t.swap(__u);} diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index 72749e11e843..b2a12cb638d8 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -876,9 +876,7 @@ public: } template <class _ValueTp, - class = typename enable_if< - __is_same_uncvref<_ValueTp, value_type>::value - >::type + class = __enable_if_t<__is_same_uncvref<_ValueTp, value_type>::value> > _LIBCPP_INLINE_VISIBILITY __hash_value_type& operator=(_ValueTp&& __v) @@ -1237,13 +1235,13 @@ public: } template <class _Pp, - class = typename enable_if<is_constructible<value_type, _Pp>::value>::type> + class = __enable_if_t<is_constructible<value_type, _Pp>::value> > _LIBCPP_INLINE_VISIBILITY pair<iterator, bool> insert(_Pp&& __x) {return __table_.__insert_unique(_VSTD::forward<_Pp>(__x));} template <class _Pp, - class = typename enable_if<is_constructible<value_type, _Pp>::value>::type> + class = __enable_if_t<is_constructible<value_type, _Pp>::value> > _LIBCPP_INLINE_VISIBILITY iterator insert(const_iterator __p, _Pp&& __x) { @@ -1527,9 +1525,9 @@ public: _LIBCPP_INLINE_VISIBILITY void max_load_factor(float __mlf) {__table_.max_load_factor(__mlf);} _LIBCPP_INLINE_VISIBILITY - void rehash(size_type __n) {__table_.rehash(__n);} + void rehash(size_type __n) {__table_.__rehash_unique(__n);} _LIBCPP_INLINE_VISIBILITY - void reserve(size_type __n) {__table_.reserve(__n);} + void reserve(size_type __n) {__table_.__reserve_unique(__n);} #ifdef _LIBCPP_ENABLE_DEBUG_MODE @@ -1628,7 +1626,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); } template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc> @@ -1638,7 +1636,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( : __table_(__hf, __eql, typename __table::allocator_type(__a)) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); } template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc> @@ -1667,7 +1665,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__first, __last); } @@ -1679,7 +1677,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( : __table_(__hf, __eql, typename __table::allocator_type(__a)) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__first, __last); } @@ -1689,7 +1687,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( : __table_(__u.__table_) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__u.bucket_count()); + __table_.__rehash_unique(__u.bucket_count()); insert(__u.begin(), __u.end()); } @@ -1699,7 +1697,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( : __table_(__u.__table_, typename __table::allocator_type(__a)) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__u.bucket_count()); + __table_.__rehash_unique(__u.bucket_count()); insert(__u.begin(), __u.end()); } @@ -1749,7 +1747,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__il.begin(), __il.end()); } @@ -1760,7 +1758,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( : __table_(__hf, __eql, typename __table::allocator_type(__a)) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__il.begin(), __il.end()); } @@ -2114,13 +2112,13 @@ public: {return __table_.__insert_multi(__p.__i_, _VSTD::move(__x));} template <class _Pp, - class = typename enable_if<is_constructible<value_type, _Pp>::value>::type> + class = __enable_if_t<is_constructible<value_type, _Pp>::value> > _LIBCPP_INLINE_VISIBILITY iterator insert(_Pp&& __x) {return __table_.__insert_multi(_VSTD::forward<_Pp>(__x));} template <class _Pp, - class = typename enable_if<is_constructible<value_type, _Pp>::value>::type> + class = __enable_if_t<is_constructible<value_type, _Pp>::value> > _LIBCPP_INLINE_VISIBILITY iterator insert(const_iterator __p, _Pp&& __x) {return __table_.__insert_multi(__p.__i_, _VSTD::forward<_Pp>(__x));} @@ -2303,9 +2301,9 @@ public: _LIBCPP_INLINE_VISIBILITY void max_load_factor(float __mlf) {__table_.max_load_factor(__mlf);} _LIBCPP_INLINE_VISIBILITY - void rehash(size_type __n) {__table_.rehash(__n);} + void rehash(size_type __n) {__table_.__rehash_multi(__n);} _LIBCPP_INLINE_VISIBILITY - void reserve(size_type __n) {__table_.reserve(__n);} + void reserve(size_type __n) {__table_.__reserve_multi(__n);} #ifdef _LIBCPP_ENABLE_DEBUG_MODE @@ -2400,7 +2398,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); } template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc> @@ -2410,7 +2408,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( : __table_(__hf, __eql, typename __table::allocator_type(__a)) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); } template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc> @@ -2430,7 +2428,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__first, __last); } @@ -2442,7 +2440,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( : __table_(__hf, __eql, typename __table::allocator_type(__a)) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__first, __last); } @@ -2461,7 +2459,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( : __table_(__u.__table_) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__u.bucket_count()); + __table_.__rehash_multi(__u.bucket_count()); insert(__u.begin(), __u.end()); } @@ -2471,7 +2469,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( : __table_(__u.__table_, typename __table::allocator_type(__a)) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__u.bucket_count()); + __table_.__rehash_multi(__u.bucket_count()); insert(__u.begin(), __u.end()); } @@ -2522,7 +2520,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__il.begin(), __il.end()); } @@ -2533,7 +2531,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( : __table_(__hf, __eql, typename __table::allocator_type(__a)) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__il.begin(), __il.end()); } diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 97aa935f187c..fc6e8e21c0cb 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -858,9 +858,9 @@ public: _LIBCPP_INLINE_VISIBILITY void max_load_factor(float __mlf) {__table_.max_load_factor(__mlf);} _LIBCPP_INLINE_VISIBILITY - void rehash(size_type __n) {__table_.rehash(__n);} + void rehash(size_type __n) {__table_.__rehash_unique(__n);} _LIBCPP_INLINE_VISIBILITY - void reserve(size_type __n) {__table_.reserve(__n);} + void reserve(size_type __n) {__table_.__reserve_unique(__n);} #ifdef _LIBCPP_ENABLE_DEBUG_MODE @@ -942,7 +942,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(size_type __n, : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); } template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -951,7 +951,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(size_type __n, : __table_(__hf, __eql, __a) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); } template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -971,7 +971,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__first, __last); } @@ -983,7 +983,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( : __table_(__hf, __eql, __a) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__first, __last); } @@ -1002,7 +1002,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( : __table_(__u.__table_) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__u.bucket_count()); + __table_.__rehash_unique(__u.bucket_count()); insert(__u.begin(), __u.end()); } @@ -1012,7 +1012,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( : __table_(__u.__table_, __a) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__u.bucket_count()); + __table_.__rehash_unique(__u.bucket_count()); insert(__u.begin(), __u.end()); } @@ -1060,7 +1060,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__il.begin(), __il.end()); } @@ -1071,7 +1071,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( : __table_(__hf, __eql, __a) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_unique(__n); insert(__il.begin(), __il.end()); } @@ -1496,9 +1496,9 @@ public: _LIBCPP_INLINE_VISIBILITY void max_load_factor(float __mlf) {__table_.max_load_factor(__mlf);} _LIBCPP_INLINE_VISIBILITY - void rehash(size_type __n) {__table_.rehash(__n);} + void rehash(size_type __n) {__table_.__rehash_multi(__n);} _LIBCPP_INLINE_VISIBILITY - void reserve(size_type __n) {__table_.reserve(__n);} + void reserve(size_type __n) {__table_.__reserve_multi(__n);} #ifdef _LIBCPP_ENABLE_DEBUG_MODE @@ -1578,7 +1578,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); } template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -1588,7 +1588,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( : __table_(__hf, __eql, __a) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); } template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -1608,7 +1608,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__first, __last); } @@ -1620,7 +1620,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( : __table_(__hf, __eql, __a) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__first, __last); } @@ -1639,7 +1639,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( : __table_(__u.__table_) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__u.bucket_count()); + __table_.__rehash_multi(__u.bucket_count()); insert(__u.begin(), __u.end()); } @@ -1649,7 +1649,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( : __table_(__u.__table_, __a) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__u.bucket_count()); + __table_.__rehash_multi(__u.bucket_count()); insert(__u.begin(), __u.end()); } @@ -1697,7 +1697,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( : __table_(__hf, __eql) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__il.begin(), __il.end()); } @@ -1708,7 +1708,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( : __table_(__hf, __eql, __a) { _VSTD::__debug_db_insert_c(this); - __table_.rehash(__n); + __table_.__rehash_multi(__n); insert(__il.begin(), __il.end()); } diff --git a/libcxx/include/variant b/libcxx/include/variant index 65dec64dbbbd..b74416b62d91 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -777,8 +777,8 @@ public: using __index_t = __variant_index_t<sizeof...(_Types)>; inline _LIBCPP_INLINE_VISIBILITY - explicit constexpr __base(__valueless_t tag) noexcept - : __data(tag), __index(__variant_npos<__index_t>) {} + explicit constexpr __base(__valueless_t __tag) noexcept + : __data(__tag), __index(__variant_npos<__index_t>) {} template <size_t _Ip, class... _Args> inline _LIBCPP_INLINE_VISIBILITY diff --git a/libcxx/include/vector b/libcxx/include/vector index f5c09011948d..14f586c9bfd7 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -393,16 +393,14 @@ public: template <class _InputIterator> vector(_InputIterator __first, - typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value && + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIterator>::value && is_constructible< value_type, typename iterator_traits<_InputIterator>::reference>::value, _InputIterator>::type __last); template <class _InputIterator> vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a, - typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value && + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIterator>::value && is_constructible< value_type, typename iterator_traits<_InputIterator>::reference>::value>::type* = 0); @@ -465,10 +463,7 @@ public: _NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value)); template <class _InputIterator> - typename enable_if - < - __is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value && + typename enable_if <__is_exactly_cpp17_input_iterator<_InputIterator>::value && is_constructible< value_type, typename iterator_traits<_InputIterator>::reference>::value, @@ -598,10 +593,7 @@ public: iterator insert(const_iterator __position, size_type __n, const_reference __x); template <class _InputIterator> - typename enable_if - < - __is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value && + typename enable_if <__is_exactly_cpp17_input_iterator<_InputIterator>::value && is_constructible< value_type, typename iterator_traits<_InputIterator>::reference>::value, @@ -1090,8 +1082,7 @@ vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x) template <class _Tp, class _Allocator> template <class _InputIterator> vector<_Tp, _Allocator>::vector(_InputIterator __first, - typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value && + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIterator>::value && is_constructible< value_type, typename iterator_traits<_InputIterator>::reference>::value, @@ -1105,8 +1096,7 @@ vector<_Tp, _Allocator>::vector(_InputIterator __first, template <class _Tp, class _Allocator> template <class _InputIterator> vector<_Tp, _Allocator>::vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a, - typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value && + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIterator>::value && is_constructible< value_type, typename iterator_traits<_InputIterator>::reference>::value>::type*) @@ -1301,10 +1291,7 @@ vector<_Tp, _Allocator>::operator=(const vector& __x) template <class _Tp, class _Allocator> template <class _InputIterator> -typename enable_if -< - __is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value && +typename enable_if <__is_exactly_cpp17_input_iterator<_InputIterator>::value && is_constructible< _Tp, typename iterator_traits<_InputIterator>::reference>::value, @@ -1751,10 +1738,7 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, size_type __n, const_ template <class _Tp, class _Allocator> template <class _InputIterator> -typename enable_if -< - __is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value && +typename enable_if <__is_exactly_cpp17_input_iterator<_InputIterator>::value && is_constructible< _Tp, typename iterator_traits<_InputIterator>::reference>::value, @@ -2057,12 +2041,10 @@ public: vector(size_type __n, const value_type& __v, const allocator_type& __a); template <class _InputIterator> vector(_InputIterator __first, _InputIterator __last, - typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value>::type* = 0); + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIterator>::value>::type* = 0); template <class _InputIterator> vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a, - typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value>::type* = 0); + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIterator>::value>::type* = 0); template <class _ForwardIterator> vector(_ForwardIterator __first, _ForwardIterator __last, typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value>::type* = 0); @@ -2097,10 +2079,7 @@ public: _NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value)); template <class _InputIterator> - typename enable_if - < - __is_cpp17_input_iterator<_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value, + typename enable_if <__is_exactly_cpp17_input_iterator<_InputIterator>::value, void >::type assign(_InputIterator __first, _InputIterator __last); @@ -2205,17 +2184,14 @@ public: #if _LIBCPP_STD_VER > 11 template <class... _Args> - _LIBCPP_INLINE_VISIBILITY iterator emplace(const_iterator position, _Args&&... __args) - { return insert ( position, value_type ( _VSTD::forward<_Args>(__args)... )); } + _LIBCPP_INLINE_VISIBILITY iterator emplace(const_iterator __position, _Args&&... __args) + { return insert ( __position, value_type ( _VSTD::forward<_Args>(__args)... )); } #endif iterator insert(const_iterator __position, const value_type& __x); iterator insert(const_iterator __position, size_type __n, const value_type& __x); template <class _InputIterator> - typename enable_if - < - __is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value, + typename enable_if <__is_exactly_cpp17_input_iterator<_InputIterator>::value, iterator >::type insert(const_iterator __position, _InputIterator __first, _InputIterator __last); @@ -2522,8 +2498,7 @@ vector<bool, _Allocator>::vector(size_type __n, const value_type& __x, const all template <class _Allocator> template <class _InputIterator> vector<bool, _Allocator>::vector(_InputIterator __first, _InputIterator __last, - typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value>::type*) + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIterator>::value>::type*) : __begin_(nullptr), __size_(0), __cap_alloc_(0, __default_init_tag()) @@ -2549,8 +2524,7 @@ vector<bool, _Allocator>::vector(_InputIterator __first, _InputIterator __last, template <class _Allocator> template <class _InputIterator> vector<bool, _Allocator>::vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a, - typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value>::type*) + typename enable_if<__is_exactly_cpp17_input_iterator<_InputIterator>::value>::type*) : __begin_(nullptr), __size_(0), __cap_alloc_(0, static_cast<__storage_allocator>(__a)) @@ -2787,10 +2761,7 @@ vector<bool, _Allocator>::assign(size_type __n, const value_type& __x) template <class _Allocator> template <class _InputIterator> -typename enable_if -< - __is_cpp17_input_iterator<_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value, +typename enable_if <__is_exactly_cpp17_input_iterator<_InputIterator>::value, void >::type vector<bool, _Allocator>::assign(_InputIterator __first, _InputIterator __last) @@ -2941,10 +2912,7 @@ vector<bool, _Allocator>::insert(const_iterator __position, size_type __n, const template <class _Allocator> template <class _InputIterator> -typename enable_if -< - __is_cpp17_input_iterator <_InputIterator>::value && - !__is_cpp17_forward_iterator<_InputIterator>::value, +typename enable_if <__is_exactly_cpp17_input_iterator<_InputIterator>::value, typename vector<bool, _Allocator>::iterator >::type vector<bool, _Allocator>::insert(const_iterator __position, _InputIterator __first, _InputIterator __last) diff --git a/libcxx/include/wchar.h b/libcxx/include/wchar.h index ce63cf247618..0fba53b268ae 100644 --- a/libcxx/include/wchar.h +++ b/libcxx/include/wchar.h @@ -176,10 +176,10 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_PREFERRED_OVERLOAD #if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT_LIKE) || defined(__MVS__)) extern "C" { -size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, - size_t nmc, size_t len, mbstate_t *__restrict ps); -size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, - size_t nwc, size_t len, mbstate_t *__restrict ps); +size_t mbsnrtowcs(wchar_t *__restrict __dst, const char **__restrict __src, + size_t __nmc, size_t __len, mbstate_t *__restrict __ps); +size_t wcsnrtombs(char *__restrict __dst, const wchar_t **__restrict __src, + size_t __nwc, size_t __len, mbstate_t *__restrict __ps); } // extern "C" #endif // __cplusplus && (_LIBCPP_MSVCRT || __MVS__) diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 8edb545cd653..dd089f5ab671 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -15,6 +15,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Object/COFF.h" #include "llvm/Support/CachePruning.h" +#include "llvm/Support/VirtualFileSystem.h" #include <cstdint> #include <map> #include <set> @@ -238,6 +239,9 @@ struct Configuration { // Used for /print-symbol-order: StringRef printSymbolOrder; + // Used for /vfsoverlay: + std::unique_ptr<llvm::vfs::FileSystem> vfs; + uint64_t align = 4096; uint64_t imageBase = -1; uint64_t fileAlign = 512; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index ffa900d42f2d..155e4ca6ee3f 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -436,17 +436,26 @@ void LinkerDriver::parseDirectives(InputFile *file) { // Find file from search paths. You can omit ".obj", this function takes // care of that. Note that the returned path is not guaranteed to exist. StringRef LinkerDriver::doFindFile(StringRef filename) { + auto getFilename = [](StringRef filename) -> StringRef { + if (config->vfs) + if (auto statOrErr = config->vfs->status(filename)) + return saver().save(statOrErr->getName()); + return filename; + }; + bool hasPathSep = (filename.find_first_of("/\\") != StringRef::npos); if (hasPathSep) - return filename; + return getFilename(filename); bool hasExt = filename.contains('.'); for (StringRef dir : searchPaths) { SmallString<128> path = dir; sys::path::append(path, filename); + path = SmallString<128>{getFilename(path.str())}; if (sys::fs::exists(path.str())) return saver().save(path.str()); if (!hasExt) { path.append(".obj"); + path = SmallString<128>{getFilename(path.str())}; if (sys::fs::exists(path.str())) return saver().save(path.str()); } @@ -1349,6 +1358,28 @@ Optional<std::string> getReproduceFile(const opt::InputArgList &args) { return None; } +static std::unique_ptr<llvm::vfs::FileSystem> +getVFS(const opt::InputArgList &args) { + using namespace llvm::vfs; + + const opt::Arg *arg = args.getLastArg(OPT_vfsoverlay); + if (!arg) + return nullptr; + + auto bufOrErr = llvm::MemoryBuffer::getFile(arg->getValue()); + if (!bufOrErr) { + checkError(errorCodeToError(bufOrErr.getError())); + return nullptr; + } + + if (auto ret = vfs::getVFSFromYAML(std::move(*bufOrErr), /*DiagHandler*/ nullptr, + arg->getValue())) + return ret; + + error("Invalid vfs overlay"); + return nullptr; +} + void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { ScopedTimer rootTimer(ctx.rootTimer); @@ -1390,6 +1421,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { errorHandler().errorLimit = n; } + config->vfs = getVFS(args); + // Handle /help if (args.hasArg(OPT_help)) { printHelp(argsArr[0]); diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index 9f29ea0d523a..5135f4ea34af 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -278,6 +278,8 @@ def print_symbol_order: P< "/call-graph-profile-sort into the specified file">; def wrap : P_priv<"wrap">; +def vfsoverlay : P<"vfsoverlay", "Path to a vfsoverlay yaml file to optionally look for /defaultlib's in">; + // Flags for debugging def lldmap : F<"lldmap">; def lldmap_file : P_priv<"lldmap">; diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 7ba0214eb2a7..c09bb2e60786 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -7,9 +7,11 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "llvm/Support/TimeProfiler.h" using namespace llvm; using namespace llvm::object; @@ -36,6 +38,7 @@ public: const uint8_t *loc) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; + bool relaxOnce(int pass) const override; }; } // end anonymous namespace @@ -267,16 +270,12 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_TPREL_LO12_I: case R_RISCV_TPREL_LO12_S: return R_TPREL; - case R_RISCV_RELAX: case R_RISCV_TPREL_ADD: return R_NONE; case R_RISCV_ALIGN: - // Not just a hint; always padded to the worst-case number of NOPs, so may - // not currently be aligned, and without linker relaxation support we can't - // delete NOPs to realign. - errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires " - "unimplemented linker relaxation; recompile with -mno-relax"); - return R_NONE; + return R_RELAX_HINT; + case R_RISCV_RELAX: + return config->relax ? R_RELAX_HINT : R_NONE; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -301,7 +300,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { return; case R_RISCV_RVC_BRANCH: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 8, rel); + checkInt(loc, val, 9, rel); checkAlignment(loc, val, 2, rel); uint16_t insn = read16le(loc) & 0xE383; uint16_t imm8 = extractBits(val, 8, 8) << 12; @@ -316,7 +315,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } case R_RISCV_RVC_JUMP: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 11, rel); + checkInt(loc, val, 12, rel); checkAlignment(loc, val, 2, rel); uint16_t insn = read16le(loc) & 0xE003; uint16_t imm11 = extractBits(val, 11, 11) << 12; @@ -347,7 +346,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } case R_RISCV_JAL: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 20, rel); + checkInt(loc, val, 21, rel); checkAlignment(loc, val, 2, rel); uint32_t insn = read32le(loc) & 0xFFF; @@ -362,7 +361,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } case R_RISCV_BRANCH: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 12, rel); + checkInt(loc, val, 13, rel); checkAlignment(loc, val, 2, rel); uint32_t insn = read32le(loc) & 0x1FFF07F; @@ -476,6 +475,291 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } } +namespace { +struct SymbolAnchor { + uint64_t offset; + Defined *d; + bool end; // true for the anchor of st_value+st_size +}; +} // namespace + +struct elf::RISCVRelaxAux { + // This records symbol start and end offsets which will be adjusted according + // to the nearest relocDeltas element. + SmallVector<SymbolAnchor, 0> anchors; + // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] : + // 0). + std::unique_ptr<uint32_t[]> relocDeltas; + // For relocations[i], the actual type is relocTypes[i]. + std::unique_ptr<RelType[]> relocTypes; + SmallVector<uint32_t, 0> writes; +}; + +static void initSymbolAnchors() { + SmallVector<InputSection *, 0> storage; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + sec->relaxAux = make<RISCVRelaxAux>(); + if (sec->relocations.size()) { + sec->relaxAux->relocDeltas = + std::make_unique<uint32_t[]>(sec->relocations.size()); + sec->relaxAux->relocTypes = + std::make_unique<RelType[]>(sec->relocations.size()); + } + } + } + // Store anchors (st_value and st_value+st_size) for symbols relative to text + // sections. + for (InputFile *file : ctx->objectFiles) + for (Symbol *sym : file->getSymbols()) { + auto *d = dyn_cast<Defined>(sym); + if (!d || d->file != file) + continue; + if (auto *sec = dyn_cast_or_null<InputSection>(d->section)) + if (sec->flags & SHF_EXECINSTR && sec->relaxAux) { + // If sec is discarded, relaxAux will be nullptr. + sec->relaxAux->anchors.push_back({d->value, d, false}); + sec->relaxAux->anchors.push_back({d->value + d->size, d, true}); + } + } + // Sort anchors by offset so that we can find the closest relocation + // efficiently. For a zero size symbol, ensure that its start anchor precedes + // its end anchor. For two symbols with anchors at the same offset, their + // order does not matter. + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + llvm::sort(sec->relaxAux->anchors, [](auto &a, auto &b) { + return std::make_pair(a.offset, a.end) < + std::make_pair(b.offset, b.end); + }); + } + } +} + +// Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal. +static void relaxCall(const InputSection &sec, size_t i, uint64_t loc, + Relocation &r, uint32_t &remove) { + const bool rvc = config->eflags & EF_RISCV_RVC; + const Symbol &sym = *r.sym; + const uint64_t insnPair = read64le(sec.rawData.data() + r.offset); + const uint32_t rd = extractBits(insnPair, 32 + 11, 32 + 7); + const uint64_t dest = + (r.expr == R_PLT_PC ? sym.getPltVA() : sym.getVA()) + r.addend; + const int64_t displace = dest - loc; + + if (rvc && isInt<12>(displace) && rd == 0) { + sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP; + sec.relaxAux->writes.push_back(0xa001); // c.j + remove = 6; + } else if (rvc && isInt<12>(displace) && rd == X_RA && + !config->is64) { // RV32C only + sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP; + sec.relaxAux->writes.push_back(0x2001); // c.jal + remove = 6; + } else if (isInt<21>(displace)) { + sec.relaxAux->relocTypes[i] = R_RISCV_JAL; + sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal + remove = 4; + } +} + +static bool relax(InputSection &sec) { + const uint64_t secAddr = sec.getVA(); + auto &aux = *sec.relaxAux; + bool changed = false; + + // Get st_value delta for symbols relative to this section from the previous + // iteration. + DenseMap<const Defined *, uint64_t> valueDelta; + ArrayRef<SymbolAnchor> sa = makeArrayRef(aux.anchors); + uint32_t delta = 0; + for (auto it : llvm::enumerate(sec.relocations)) { + for (; sa.size() && sa[0].offset <= it.value().offset; sa = sa.slice(1)) + if (!sa[0].end) + valueDelta[sa[0].d] = delta; + delta = aux.relocDeltas[it.index()]; + } + for (const SymbolAnchor &sa : sa) + if (!sa.end) + valueDelta[sa.d] = delta; + sa = makeArrayRef(aux.anchors); + delta = 0; + + std::fill_n(aux.relocTypes.get(), sec.relocations.size(), R_RISCV_NONE); + aux.writes.clear(); + for (auto it : llvm::enumerate(sec.relocations)) { + Relocation &r = it.value(); + const size_t i = it.index(); + const uint64_t loc = secAddr + r.offset - delta; + uint32_t &cur = aux.relocDeltas[i], remove = 0; + switch (r.type) { + case R_RISCV_ALIGN: { + const uint64_t nextLoc = loc + r.addend; + const uint64_t align = PowerOf2Ceil(r.addend + 2); + // All bytes beyond the alignment boundary should be removed. + remove = nextLoc - ((loc + align - 1) & -align); + assert(static_cast<int32_t>(remove) >= 0 && + "R_RISCV_ALIGN needs expanding the content"); + break; + } + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: + if (i + 1 != sec.relocations.size() && + sec.relocations[i + 1].type == R_RISCV_RELAX) + relaxCall(sec, i, loc, r, remove); + break; + } + + // For all anchors whose offsets are <= r.offset, they are preceded by + // the previous relocation whose `relocDeltas` value equals `delta`. + // Decrease their st_value and update their st_size. + for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) { + if (sa[0].end) + sa[0].d->size = sa[0].offset - delta - sa[0].d->value; + else + sa[0].d->value -= delta - valueDelta.find(sa[0].d)->second; + } + delta += remove; + if (delta != cur) { + cur = delta; + changed = true; + } + } + + for (const SymbolAnchor &a : sa) { + if (a.end) + a.d->size = a.offset - delta - a.d->value; + else + a.d->value -= delta - valueDelta.find(a.d)->second; + } + // Inform assignAddresses that the size has changed. + if (!isUInt<16>(delta)) + fatal("section size decrease is too large"); + sec.bytesDropped = delta; + return changed; +} + +// When relaxing just R_RISCV_ALIGN, relocDeltas is usually changed only once in +// the absence of a linker script. For call and load/store R_RISCV_RELAX, code +// shrinkage may reduce displacement and make more relocations eligible for +// relaxation. Code shrinkage may increase displacement to a call/load/store +// target at a higher fixed address, invalidating an earlier relaxation. Any +// change in section sizes can have cascading effect and require another +// relaxation pass. +bool RISCV::relaxOnce(int pass) const { + llvm::TimeTraceScope timeScope("RISC-V relaxOnce"); + if (config->relocatable) + return false; + + if (pass == 0) + initSymbolAnchors(); + + SmallVector<InputSection *, 0> storage; + bool changed = false; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) + changed |= relax(*sec); + } + return changed; +} + +void elf::riscvFinalizeRelax(int passes) { + llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation"); + log("relaxation passes: " + Twine(passes)); + SmallVector<InputSection *, 0> storage; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + RISCVRelaxAux &aux = *sec->relaxAux; + if (!aux.relocDeltas) + continue; + + auto &rels = sec->relocations; + ArrayRef<uint8_t> old = sec->rawData; + size_t newSize = + old.size() - aux.relocDeltas[sec->relocations.size() - 1]; + size_t writesIdx = 0; + uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize); + uint64_t offset = 0; + int64_t delta = 0; + sec->rawData = makeArrayRef(p, newSize); + sec->bytesDropped = 0; + + // Update section content: remove NOPs for R_RISCV_ALIGN and rewrite + // instructions for relaxed relocations. + for (size_t i = 0, e = rels.size(); i != e; ++i) { + uint32_t remove = aux.relocDeltas[i] - delta; + delta = aux.relocDeltas[i]; + if (remove == 0) + continue; + + // Copy from last location to the current relocated location. + const Relocation &r = rels[i]; + uint64_t size = r.offset - offset; + memcpy(p, old.data() + offset, size); + p += size; + + // For R_RISCV_ALIGN, we will place `offset` in a location (among NOPs) + // to satisfy the alignment requirement. If `remove` is a multiple of 4, + // it is as if we have skipped some NOPs. Otherwise we are in the middle + // of a 4-byte NOP, and we need to rewrite the NOP sequence. + int64_t skip = 0; + if (r.type == R_RISCV_ALIGN) { + if (remove % 4 != 0) { + skip = r.addend - remove; + int64_t j = 0; + for (; j + 4 <= skip; j += 4) + write32le(p + j, 0x00000013); // nop + if (j != skip) { + assert(j + 2 == skip); + write16le(p + j, 0x0001); // c.nop + } + } + } else if (RelType newType = aux.relocTypes[i]) { + const uint32_t insn = aux.writes[writesIdx++]; + switch (newType) { + case R_RISCV_RVC_JUMP: + skip = 2; + write16le(p, insn); + break; + case R_RISCV_JAL: + skip = 4; + write32le(p, insn); + break; + default: + llvm_unreachable("unsupported type"); + } + } + + p += skip; + offset = r.offset + skip + remove; + } + memcpy(p, old.data() + offset, old.size() - offset); + + // Subtract the previous relocDeltas value from the relocation offset. + // For a pair of R_RISCV_CALL/R_RISCV_RELAX with the same offset, decrease + // their r_offset by the same delta. + delta = 0; + for (size_t i = 0, e = rels.size(); i != e;) { + uint64_t cur = rels[i].offset; + do { + rels[i].offset -= delta; + if (aux.relocTypes[i] != R_RISCV_NONE) + rels[i].type = aux.relocTypes[i]; + } while (++i != e && rels[i].offset == cur); + delta = aux.relocDeltas[i - 1]; + } + } + } +} + TargetInfo *elf::getRISCVTargetInfo() { static RISCV target; return ⌖ diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index c593880d5cd3..39723f092784 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -11,6 +11,7 @@ #include "lld/Common/ErrorHandler.h" #include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" @@ -215,7 +216,7 @@ struct Configuration { bool relocatable; bool relrGlibc = false; bool relrPackDynRelocs = false; - bool saveTemps; + llvm::DenseSet<llvm::StringRef> saveTempsArgs; std::vector<std::pair<llvm::GlobPattern, uint32_t>> shuffleSections; bool singleRoRx; bool shared; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 7500b68a9bef..4c26cba1cb4f 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -500,6 +500,10 @@ static void checkZOptions(opt::InputArgList &args) { warn("unknown -z value: " + StringRef(arg->getValue())); } +constexpr const char *saveTempsValues[] = { + "resolution", "preopt", "promote", "internalize", "import", + "opt", "precodegen", "prelink", "combinedindex"}; + void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { ELFOptTable parser; opt::InputArgList args = parser.parse(argsArr.slice(1)); @@ -947,7 +951,7 @@ static bool getCompressDebugSections(opt::InputArgList &args) { return false; if (s != "zlib") error("unknown --compress-debug-sections value: " + s); - if (!zlib::isAvailable()) + if (!compression::zlib::isAvailable()) error("--compress-debug-sections: zlib is not available"); return true; } @@ -1151,7 +1155,21 @@ static void readConfigs(opt::InputArgList &args) { config->relax = args.hasFlag(OPT_relax, OPT_no_relax, true); config->rpath = getRpath(args); config->relocatable = args.hasArg(OPT_relocatable); - config->saveTemps = args.hasArg(OPT_save_temps); + + if (args.hasArg(OPT_save_temps)) { + // --save-temps implies saving all temps. + for (const char *s : saveTempsValues) + config->saveTempsArgs.insert(s); + } else { + for (auto *arg : args.filtered(OPT_save_temps_eq)) { + StringRef s = arg->getValue(); + if (llvm::is_contained(saveTempsValues, s)) + config->saveTempsArgs.insert(s); + else + error("unknown --save-temps value: " + s); + } + } + config->searchPaths = args::getStrings(args, OPT_library_path); config->sectionStartMap = getSectionStartMap(args); config->shared = args.hasArg(OPT_shared); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 97fc18b58244..8fe36eca6a4b 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -72,12 +72,8 @@ InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags, // If SHF_COMPRESSED is set, parse the header. The legacy .zdebug format is no // longer supported. - if (flags & SHF_COMPRESSED) { - if (!zlib::isAvailable()) - error(toString(file) + ": contains a compressed section, " + - "but zlib is not available"); + if (flags & SHF_COMPRESSED) invokeELFT(parseCompressedHeader); - } } // Drop SHF_GROUP bit unless we are producing a re-linkable object file. @@ -115,17 +111,17 @@ size_t InputSectionBase::getSize() const { void InputSectionBase::uncompress() const { size_t size = uncompressedSize; - char *uncompressedBuf; + uint8_t *uncompressedBuf; { static std::mutex mu; std::lock_guard<std::mutex> lock(mu); - uncompressedBuf = bAlloc().Allocate<char>(size); + uncompressedBuf = bAlloc().Allocate<uint8_t>(size); } - if (Error e = zlib::uncompress(toStringRef(rawData), uncompressedBuf, size)) + if (Error e = compression::zlib::uncompress(rawData, uncompressedBuf, size)) fatal(toString(this) + ": uncompress failed: " + llvm::toString(std::move(e))); - rawData = makeArrayRef((uint8_t *)uncompressedBuf, size); + rawData = makeArrayRef(uncompressedBuf, size); uncompressedSize = -1; } @@ -211,8 +207,13 @@ template <typename ELFT> void InputSectionBase::parseCompressedHeader() { } auto *hdr = reinterpret_cast<const typename ELFT::Chdr *>(rawData.data()); - if (hdr->ch_type != ELFCOMPRESS_ZLIB) { - error(toString(this) + ": unsupported compression type"); + if (hdr->ch_type == ELFCOMPRESS_ZLIB) { + if (!compression::zlib::isAvailable()) + error(toString(this) + " is compressed with ELFCOMPRESS_ZLIB, but lld is " + "not built with zlib support"); + } else { + error(toString(this) + ": unsupported compression type (" + + Twine(hdr->ch_type) + ")"); return; } @@ -622,6 +623,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return sym.getVA(a); case R_ADDEND: return a; + case R_RELAX_HINT: + return 0; case R_ARM_SBREL: return sym.getVA(a) - getARMStaticBase(sym); case R_GOT: @@ -987,6 +990,8 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { *rel.sym, rel.expr), bits); switch (rel.expr) { + case R_RELAX_HINT: + continue; case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: target.relaxGot(bufLoc, rel, targetVA); @@ -1213,7 +1218,7 @@ template <class ELFT> void InputSection::writeTo(uint8_t *buf) { // to the buffer. if (uncompressedSize >= 0) { size_t size = uncompressedSize; - if (Error e = zlib::uncompress(toStringRef(rawData), (char *)buf, size)) + if (Error e = compression::zlib::uncompress(rawData, buf, size)) fatal(toString(this) + ": uncompress failed: " + llvm::toString(std::move(e))); uint8_t *bufEnd = buf + size; diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index c7c8f45f432d..d1b889750bbd 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -10,7 +10,9 @@ #define LLD_ELF_INPUT_SECTION_H #include "Relocations.h" +#include "lld/Common/CommonLinkerContext.h" #include "lld/Common/LLVM.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/TinyPtrVector.h" @@ -97,6 +99,8 @@ protected: link(link), info(info) {} }; +struct RISCVRelaxAux; + // This corresponds to a section of an input file. class InputSectionBase : public SectionBase { public: @@ -129,11 +133,10 @@ public: return cast_or_null<ObjFile<ELFT>>(file); } - // If basic block sections are enabled, many code sections could end up with - // one or two jump instructions at the end that could be relaxed to a smaller - // instruction. The members below help trimming the trailing jump instruction - // and shrinking a section. - uint8_t bytesDropped = 0; + // Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to + // indicate the number of bytes which is not counted in the size. This should + // be reset to zero after uses. + uint16_t bytesDropped = 0; // Whether the section needs to be padded with a NOP filler due to // deleteFallThruJmpInsn. @@ -201,11 +204,17 @@ public: // This vector contains such "cooked" relocations. SmallVector<Relocation, 0> relocations; - // These are modifiers to jump instructions that are necessary when basic - // block sections are enabled. Basic block sections creates opportunities to - // relax jump instructions at basic block boundaries after reordering the - // basic blocks. - JumpInstrMod *jumpInstrMod = nullptr; + union { + // These are modifiers to jump instructions that are necessary when basic + // block sections are enabled. Basic block sections creates opportunities + // to relax jump instructions at basic block boundaries after reordering the + // basic blocks. + JumpInstrMod *jumpInstrMod = nullptr; + + // Auxiliary information for RISC-V linker relaxation. RISC-V does not use + // jumpInstrMod. + RISCVRelaxAux *relaxAux; + }; // A function compiled with -fsplit-stack calling a function // compiled without -fsplit-stack needs its prologue adjusted. Find diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index e44ef0d3c2c8..8c5001af3a91 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -178,9 +178,10 @@ static lto::Config createConfig() { if (config->ltoEmitAsm) c.CGFileType = CGFT_AssemblyFile; - if (config->saveTemps) + if (!config->saveTempsArgs.empty()) checkError(c.addSaveTemps(config->outputFile.str() + ".", - /*UseInputModulePath*/ true)); + /*UseInputModulePath*/ true, + config->saveTempsArgs)); return c; } @@ -365,7 +366,7 @@ std::vector<InputFile *> BitcodeCompiler::compile() { saveBuffer(buf[i], config->ltoObjPath + Twine(i)); } - if (config->saveTemps) { + if (config->saveTempsArgs.contains("prelink")) { if (!buf[0].empty()) saveBuffer(buf[0], config->outputFile + ".lto.o"); for (unsigned i = 1; i != maxTasks; ++i) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 1d9fbcbcee3c..c98d21717de0 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -585,6 +585,8 @@ def opt_remarks_with_hotness: FF<"opt-remarks-with-hotness">, def opt_remarks_format: Separate<["--"], "opt-remarks-format">, HelpText<"The format used for serializing remarks (default: YAML)">; def save_temps: F<"save-temps">, HelpText<"Save intermediate LTO compilation results">; +def save_temps_eq: JJ<"save-temps=">, HelpText<"Save select intermediate LTO compilation results">, + Values<"resolution,preopt,promote,internalize,import,opt,precodegen,prelink,combinedindex">; def lto_basic_block_sections: JJ<"lto-basic-block-sections=">, HelpText<"Enable basic block sections for LTO">; defm lto_unique_basic_block_section_names: BB<"lto-unique-basic-block-section-names", diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index bc940c7e6546..cbde8ac800d3 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -419,7 +419,8 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) { } // Write leading padding. - SmallVector<InputSection *, 0> sections = getInputSections(*this); + SmallVector<InputSection *, 0> storage; + ArrayRef<InputSection *> sections = getInputSections(*this, storage); std::array<uint8_t, 4> filler = getFiller(); bool nonZeroFiller = read32(filler.data()) != 0; if (nonZeroFiller) @@ -592,12 +593,24 @@ InputSection *elf::getFirstInputSection(const OutputSection *os) { return nullptr; } -SmallVector<InputSection *, 0> elf::getInputSections(const OutputSection &os) { - SmallVector<InputSection *, 0> ret; - for (SectionCommand *cmd : os.commands) - if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) - ret.insert(ret.end(), isd->sections.begin(), isd->sections.end()); - return ret; +ArrayRef<InputSection *> +elf::getInputSections(const OutputSection &os, + SmallVector<InputSection *, 0> &storage) { + ArrayRef<InputSection *> ret; + storage.clear(); + for (SectionCommand *cmd : os.commands) { + auto *isd = dyn_cast<InputSectionDescription>(cmd); + if (!isd) + continue; + if (ret.empty()) { + ret = isd->sections; + } else { + if (storage.empty()) + storage.assign(ret.begin(), ret.end()); + storage.insert(storage.end(), isd->sections.begin(), isd->sections.end()); + } + } + return storage.empty() ? ret : makeArrayRef(storage); } // Sorts input sections by section name suffixes, so that .foo.N comes @@ -622,7 +635,8 @@ std::array<uint8_t, 4> OutputSection::getFiller() { void OutputSection::checkDynRelAddends(const uint8_t *bufStart) { assert(config->writeAddends && config->checkDynamicRelocs); assert(type == SHT_REL || type == SHT_RELA); - SmallVector<InputSection *, 0> sections = getInputSections(*this); + SmallVector<InputSection *, 0> storage; + ArrayRef<InputSection *> sections = getInputSections(*this, storage); parallelFor(0, sections.size(), [&](size_t i) { // When linking with -r or --emit-relocs we might also call this function // for input .rel[a].<sec> sections which we simply pass through to the diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h index 020eeaec368e..6bdbcfce7b12 100644 --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -134,7 +134,9 @@ struct OutputDesc final : SectionCommand { int getPriority(StringRef s); InputSection *getFirstInputSection(const OutputSection *os); -SmallVector<InputSection *, 0> getInputSections(const OutputSection &os); +llvm::ArrayRef<InputSection *> +getInputSections(const OutputSection &os, + SmallVector<InputSection *, 0> &storage); // All output sections that are handled by the linker specially are // globally accessible. Writer initializes them, so don't use them diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 64381ae75414..e54e1ebd41bb 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -958,8 +958,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym, uint64_t relOff) const { // These expressions always compute a constant - if (oneof<R_GOTPLT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOTREL, - R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, + if (oneof<R_GOTPLT, R_GOT_OFF, R_RELAX_HINT, R_MIPS_GOT_LOCAL_PAGE, + R_MIPS_GOTREL, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e)) @@ -2118,7 +2118,9 @@ bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) { // made no changes. If the target requires range extension thunks, currently // ARM, then any future change in offset between caller and callee risks a // relocation out of range error. -bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) { +bool ThunkCreator::createThunks(uint32_t pass, + ArrayRef<OutputSection *> outputSections) { + this->pass = pass; bool addressesChanged = false; if (pass == 0 && target->getThunkSectionSpacing()) @@ -2180,7 +2182,6 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) { // Merge all created synthetic ThunkSections back into OutputSection mergeThunks(outputSections); - ++pass; return addressesChanged; } diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index da0f2289bc90..f70d255ba229 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -46,6 +46,7 @@ enum RelExpr { R_PLT, R_PLT_PC, R_PLT_GOTPLT, + R_RELAX_HINT, R_RELAX_GOT_PC, R_RELAX_GOT_PC_NOPIC, R_RELAX_TLS_GD_TO_IE, @@ -139,12 +140,7 @@ class InputSectionDescription; class ThunkCreator { public: // Return true if Thunks have been added to OutputSections - bool createThunks(ArrayRef<OutputSection *> outputSections); - - // The number of completed passes of createThunks this permits us - // to do one time initialization on Pass 0 and put a limit on the - // number of times it can be called to prevent infinite loops. - uint32_t pass = 0; + bool createThunks(uint32_t pass, ArrayRef<OutputSection *> outputSections); private: void mergeThunks(ArrayRef<OutputSection *> outputSections); @@ -186,6 +182,11 @@ private: // so we need to make sure that there is only one of them. // The Mips LA25 Thunk is an example of an inline ThunkSection. llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections; + + // The number of completed passes of createThunks this permits us + // to do one time initialization on Pass 0 and put a limit on the + // number of times it can be called to prevent infinite loops. + uint32_t pass = 0; }; // Return a int64_t to make sure we get the sign extension out of the way as diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 5e9bbd62572d..14b1f53c6a81 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -89,6 +89,9 @@ public: relocate(loc, Relocation{R_NONE, type, 0, 0, nullptr}, val); } + // Do a linker relaxation pass and return true if we changed something. + virtual bool relaxOnce(int pass) const { return false; } + virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, JumpModType val) const {} @@ -221,6 +224,7 @@ void writePrefixedInstruction(uint8_t *loc, uint64_t insn); void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); +void riscvFinalizeRelax(int passes); class AArch64Relaxer { bool safeToRelaxAdrpLdr = true; diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index d6172edc76f2..738eb24f2200 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -916,25 +916,18 @@ void PPC64R2SaveStub::writeTo(uint8_t *buf) { write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b <offset> } else if (isInt<34>(offset)) { int nextInstOffset; - if (!config->power10Stubs) { - uint64_t tocOffset = destination.getVA() - getPPC64TocBase(); - if (tocOffset >> 16 > 0) { - const uint64_t addi = ADDI_R12_TO_R12_NO_DISP | (tocOffset & 0xffff); - const uint64_t addis = ADDIS_R12_TO_R2_NO_DISP | ((tocOffset >> 16) & 0xffff); - write32(buf + 4, addis); // addis r12, r2 , top of offset - write32(buf + 8, addi); // addi r12, r12, bottom of offset - nextInstOffset = 12; - } else { - const uint64_t addi = ADDI_R12_TO_R2_NO_DISP | (tocOffset & 0xffff); - write32(buf + 4, addi); // addi r12, r2, offset - nextInstOffset = 8; - } - } else { - const uint64_t paddi = PADDI_R12_NO_DISP | - (((offset >> 16) & 0x3ffff) << 32) | - (offset & 0xffff); - writePrefixedInstruction(buf + 4, paddi); // paddi r12, 0, func@pcrel, 1 + uint64_t tocOffset = destination.getVA() - getPPC64TocBase(); + if (tocOffset >> 16 > 0) { + const uint64_t addi = ADDI_R12_TO_R12_NO_DISP | (tocOffset & 0xffff); + const uint64_t addis = + ADDIS_R12_TO_R2_NO_DISP | ((tocOffset >> 16) & 0xffff); + write32(buf + 4, addis); // addis r12, r2 , top of offset + write32(buf + 8, addi); // addi r12, r12, bottom of offset nextInstOffset = 12; + } else { + const uint64_t addi = ADDI_R12_TO_R2_NO_DISP | (tocOffset & 0xffff); + write32(buf + 4, addi); // addi r12, r2, offset + nextInstOffset = 8; } write32(buf + nextInstOffset, MTCTR_R12); // mtctr r12 write32(buf + nextInstOffset + 4, BCTR); // bctr diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 7bfe29eda695..705cc7bf9766 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1312,12 +1312,16 @@ static DenseMap<const InputSectionBase *, int> buildSectionOrder() { // Sorts the sections in ISD according to the provided section order. static void sortISDBySectionOrder(InputSectionDescription *isd, - const DenseMap<const InputSectionBase *, int> &order) { + const DenseMap<const InputSectionBase *, int> &order, + bool executableOutputSection) { SmallVector<InputSection *, 0> unorderedSections; SmallVector<std::pair<InputSection *, int>, 0> orderedSections; uint64_t unorderedSize = 0; + uint64_t totalSize = 0; for (InputSection *isec : isd->sections) { + if (executableOutputSection) + totalSize += isec->getSize(); auto i = order.find(isec); if (i == order.end()) { unorderedSections.push_back(isec); @@ -1355,8 +1359,15 @@ sortISDBySectionOrder(InputSectionDescription *isd, // of the second block of cold code can call the hot code without a thunk. So // we effectively double the amount of code that could potentially call into // the hot code without a thunk. + // + // The above is not necessary if total size of input sections in this "isd" + // is small. Note that we assume all input sections are executable if the + // output section is executable (which is not always true but supposed to + // cover most cases). size_t insPt = 0; - if (target->getThunkSectionSpacing() && !orderedSections.empty()) { + if (executableOutputSection && !orderedSections.empty() && + target->getThunkSectionSpacing() && + totalSize >= target->getThunkSectionSpacing()) { uint64_t unorderedPos = 0; for (; insPt != unorderedSections.size(); ++insPt) { unorderedPos += unorderedSections[insPt]->getSize(); @@ -1397,7 +1408,7 @@ static void sortSection(OutputSection &osec, if (!order.empty()) for (SectionCommand *b : osec.commands) if (auto *isd = dyn_cast<InputSectionDescription>(b)) - sortISDBySectionOrder(isd, order); + sortISDBySectionOrder(isd, order, osec.flags & SHF_EXECINSTR); if (script->hasSectionsCommand) return; @@ -1630,14 +1641,17 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() { if (config->emachine == EM_HEXAGON) hexagonTLSSymbolUpdate(outputSections); - int assignPasses = 0; + uint32_t pass = 0, assignPasses = 0; for (;;) { - bool changed = target->needsThunks && tc.createThunks(outputSections); + bool changed = target->needsThunks ? tc.createThunks(pass, outputSections) + : target->relaxOnce(pass); + ++pass; // With Thunk Size much smaller than branch range we expect to // converge quickly; if we get to 15 something has gone wrong. - if (changed && tc.pass >= 15) { - error("thunk creation not converged"); + if (changed && pass >= 15) { + error(target->needsThunks ? "thunk creation not converged" + : "relaxation not converged"); break; } @@ -1675,6 +1689,8 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() { } } } + if (!config->relocatable && config->emachine == EM_RISCV) + riscvFinalizeRelax(pass); if (config->relocatable) for (OutputSection *sec : outputSections) @@ -1741,6 +1757,7 @@ static void fixSymbolsAfterShrinking() { // option is used. template <class ELFT> void Writer<ELFT>::optimizeBasicBlockJumps() { assert(config->optimizeBBJumps); + SmallVector<InputSection *, 0> storage; script->assignAddresses(); // For every output section that has executable input sections, this @@ -1752,7 +1769,7 @@ template <class ELFT> void Writer<ELFT>::optimizeBasicBlockJumps() { for (OutputSection *osec : outputSections) { if (!(osec->flags & SHF_EXECINSTR)) continue; - SmallVector<InputSection *, 0> sections = getInputSections(*osec); + ArrayRef<InputSection *> sections = getInputSections(*osec, storage); size_t numDeleted = 0; // Delete all fall through jump instructions. Also, check if two // consecutive jump instructions can be flipped so that a fall @@ -1772,7 +1789,7 @@ template <class ELFT> void Writer<ELFT>::optimizeBasicBlockJumps() { fixSymbolsAfterShrinking(); for (OutputSection *osec : outputSections) - for (InputSection *is : getInputSections(*osec)) + for (InputSection *is : getInputSections(*osec, storage)) is->trim(); } @@ -2165,9 +2182,10 @@ template <class ELFT> void Writer<ELFT>::checkExecuteOnly() { if (!config->executeOnly) return; + SmallVector<InputSection *, 0> storage; for (OutputSection *osec : outputSections) if (osec->flags & SHF_EXECINSTR) - for (InputSection *isec : getInputSections(*osec)) + for (InputSection *isec : getInputSections(*osec, storage)) if (!(isec->flags & SHF_EXECINSTR)) error("cannot place " + toString(isec) + " into " + toString(osec->name) + diff --git a/lld/MachO/Arch/ARM.cpp b/lld/MachO/Arch/ARM.cpp index 7de0837fcf38..fd215ed99b59 100644 --- a/lld/MachO/Arch/ARM.cpp +++ b/lld/MachO/Arch/ARM.cpp @@ -40,6 +40,9 @@ struct ARM : TargetInfo { void relaxGotLoad(uint8_t *loc, uint8_t type) const override; const RelocAttrs &getRelocAttrs(uint8_t type) const override; uint64_t getPageSize() const override { return 4 * 1024; } + + void handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const override; }; } // namespace @@ -170,3 +173,36 @@ TargetInfo *macho::createARMTargetInfo(uint32_t cpuSubtype) { static ARM t(cpuSubtype); return &t; } + +void ARM::handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const { + if (config->outputType == MH_OBJECT) + return; + + switch (r.type) { + case ARM_RELOC_BR24: + if (sym->getName().startswith("___dtrace_probe")) { + // change call site to a NOP + write32le(loc, 0xE1A00000); + } else if (sym->getName().startswith("___dtrace_isenabled")) { + // change call site to 'eor r0, r0, r0' + write32le(loc, 0xE0200000); + } else { + error("Unrecognized dtrace symbol prefix: " + toString(*sym)); + } + break; + case ARM_THUMB_RELOC_BR22: + if (sym->getName().startswith("___dtrace_probe")) { + // change 32-bit blx call site to two thumb NOPs + write32le(loc, 0x46C046C0); + } else if (sym->getName().startswith("___dtrace_isenabled")) { + // change 32-bit blx call site to 'nop', 'eor r0, r0' + write32le(loc, 0x46C04040); + } else { + error("Unrecognized dtrace symbol prefix: " + toString(*sym)); + } + break; + default: + llvm_unreachable("Unsupported dtrace relocation type for ARM"); + } +} diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp index 7064df5793aa..5901a9e09b35 100644 --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -168,10 +168,10 @@ enum ExtendType { ZeroExtend = 1, Sign64 = 2, Sign32 = 3 }; struct Ldr { uint8_t destRegister; uint8_t baseRegister; - uint8_t size; + uint8_t p2Size; bool isFloat; ExtendType extendType; - uint64_t offset; + int64_t offset; }; struct PerformedReloc { @@ -189,6 +189,8 @@ public: void applyAdrpAdd(const OptimizationHint &); void applyAdrpAdrp(const OptimizationHint &); void applyAdrpLdr(const OptimizationHint &); + void applyAdrpLdrGot(const OptimizationHint &); + void applyAdrpLdrGotLdr(const OptimizationHint &); private: uint8_t *buf; @@ -227,34 +229,35 @@ static bool parseLdr(uint32_t insn, Ldr &ldr) { if ((insn & 0x3fc00000) == 0x39400000) { // LDR (immediate), LDRB (immediate), LDRH (immediate) - ldr.size = 1 << size; + ldr.p2Size = size; ldr.extendType = ZeroExtend; ldr.isFloat = false; } else if ((insn & 0x3f800000) == 0x39800000) { // LDRSB (immediate), LDRSH (immediate), LDRSW (immediate) - ldr.size = 1 << size; + ldr.p2Size = size; ldr.extendType = static_cast<ExtendType>(opc); ldr.isFloat = false; } else if ((insn & 0x3f400000) == 0x3d400000) { // LDR (immediate, SIMD&FP) ldr.extendType = ZeroExtend; ldr.isFloat = true; - if (size == 2 && opc == 1) - ldr.size = 4; - else if (size == 3 && opc == 1) - ldr.size = 8; + if (opc == 1) + ldr.p2Size = size; else if (size == 0 && opc == 3) - ldr.size = 16; + ldr.p2Size = 4; else return false; } else { return false; } - ldr.offset = ((insn >> 10) & 0xfff) * ldr.size; + ldr.offset = ((insn >> 10) & 0xfff) << ldr.p2Size; return true; } +static bool isValidAdrOffset(int32_t delta) { return isInt<21>(delta); } + static void writeAdr(void *loc, uint32_t dest, int32_t delta) { + assert(isValidAdrOffset(delta)); uint32_t opcode = 0x10000000; uint32_t immHi = (delta & 0x001ffffc) << 3; uint32_t immLo = (delta & 0x00000003) << 29; @@ -263,26 +266,63 @@ static void writeAdr(void *loc, uint32_t dest, int32_t delta) { static void writeNop(void *loc) { write32le(loc, 0xd503201f); } -static void writeLiteralLdr(void *loc, Ldr original, int32_t delta) { - uint32_t imm19 = (delta & 0x001ffffc) << 3; - uint32_t opcode = 0; - switch (original.size) { - case 4: - if (original.isFloat) +static bool isLiteralLdrEligible(const Ldr &ldr) { + return ldr.p2Size > 1 && isShiftedInt<19, 2>(ldr.offset); +} + +static void writeLiteralLdr(void *loc, const Ldr &ldr) { + assert(isLiteralLdrEligible(ldr)); + uint32_t imm19 = (ldr.offset / 4 & maskTrailingOnes<uint32_t>(19)) << 5; + uint32_t opcode; + switch (ldr.p2Size) { + case 2: + if (ldr.isFloat) opcode = 0x1c000000; else - opcode = original.extendType == Sign64 ? 0x98000000 : 0x18000000; + opcode = ldr.extendType == Sign64 ? 0x98000000 : 0x18000000; break; - case 8: - opcode = original.isFloat ? 0x5c000000 : 0x58000000; + case 3: + opcode = ldr.isFloat ? 0x5c000000 : 0x58000000; break; - case 16: + case 4: opcode = 0x9c000000; break; default: - assert(false && "Invalid size for literal ldr"); + llvm_unreachable("Invalid literal ldr size"); } - write32le(loc, opcode | imm19 | original.destRegister); + write32le(loc, opcode | imm19 | ldr.destRegister); +} + +static bool isImmediateLdrEligible(const Ldr &ldr) { + // Note: We deviate from ld64's behavior, which converts to immediate loads + // only if ldr.offset < 4096, even though the offset is divided by the load's + // size in the 12-bit immediate operand. Only the unsigned offset variant is + // supported. + + uint32_t size = 1 << ldr.p2Size; + return ldr.offset >= 0 && (ldr.offset % size) == 0 && + isUInt<12>(ldr.offset >> ldr.p2Size); +} + +static void writeImmediateLdr(void *loc, const Ldr &ldr) { + assert(isImmediateLdrEligible(ldr)); + uint32_t opcode = 0x39000000; + if (ldr.isFloat) { + opcode |= 0x04000000; + assert(ldr.extendType == ZeroExtend); + } + opcode |= ldr.destRegister; + opcode |= ldr.baseRegister << 5; + uint8_t size, opc; + if (ldr.p2Size == 4) { + size = 0; + opc = 3; + } else { + opc = ldr.extendType; + size = ldr.p2Size; + } + uint32_t immBits = ldr.offset >> ldr.p2Size; + write32le(loc, opcode | (immBits << 10) | (opc << 22) | (size << 30)); } uint64_t OptimizationHintContext::getRelocTarget(const Reloc &reloc) { @@ -351,7 +391,7 @@ void OptimizationHintContext::applyAdrpAdd(const OptimizationHint &hint) { if (rel1->referentVA != rel2->referentVA) return; int64_t delta = rel1->referentVA - rel1->rel.offset - isec->getVA(); - if (delta >= (1 << 20) || delta < -(1 << 20)) + if (!isValidAdrOffset(delta)) return; writeAdr(buf + hint.offset0, add.destRegister, delta); @@ -412,16 +452,120 @@ void OptimizationHintContext::applyAdrpLdr(const OptimizationHint &hint) { return; if (ldr.offset != (rel1->referentVA & 0xfff)) return; - if ((rel1->referentVA & 3) != 0) + ldr.offset = rel1->referentVA - rel2->rel.offset - isec->getVA(); + if (!isLiteralLdrEligible(ldr)) return; - if (ldr.size == 1 || ldr.size == 2) + + writeNop(buf + hint.offset0); + writeLiteralLdr(buf + hint.offset0 + hint.delta[0], ldr); +} + +// GOT loads are emitted by the compiler as a pair of adrp and ldr instructions, +// but they may be changed to adrp+add by relaxGotLoad(). This hint performs +// the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed. +void OptimizationHintContext::applyAdrpLdrGot(const OptimizationHint &hint) { + uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]); + Add add; + Ldr ldr; + if (parseAdd(ins2, add)) + applyAdrpAdd(hint); + else if (parseLdr(ins2, ldr)) + applyAdrpLdr(hint); +} + +// Relaxes a GOT-indirect load. +// If the referenced symbol is external and its GOT entry is within +/- 1 MiB, +// the GOT entry can be loaded with a single literal ldr instruction. +// If the referenced symbol is local, its address may be loaded directly if it's +// close enough, or with an adr(p) + ldr pair if it's not. +void OptimizationHintContext::applyAdrpLdrGotLdr(const OptimizationHint &hint) { + uint32_t ins1 = read32le(buf + hint.offset0); + Adrp adrp; + if (!parseAdrp(ins1, adrp)) return; - int64_t delta = rel1->referentVA - rel2->rel.offset - isec->getVA(); - if (delta >= (1 << 20) || delta < -(1 << 20)) + uint32_t ins3 = read32le(buf + hint.offset0 + hint.delta[1]); + Ldr ldr3; + if (!parseLdr(ins3, ldr3)) return; + uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]); + Ldr ldr2; + Add add2; - writeNop(buf + hint.offset0); - writeLiteralLdr(buf + hint.offset0 + hint.delta[0], ldr, delta); + Optional<PerformedReloc> rel1 = findPrimaryReloc(hint.offset0); + Optional<PerformedReloc> rel2 = findReloc(hint.offset0 + hint.delta[0]); + if (!rel1 || !rel2) + return; + + if (parseAdd(ins2, add2)) { + // adrp x0, _foo@PAGE + // add x1, x0, _foo@PAGEOFF + // ldr x2, [x1, #off] + + if (adrp.destRegister != add2.srcRegister) + return; + if (add2.destRegister != ldr3.baseRegister) + return; + + // Load from the target address directly. + // nop + // nop + // ldr x2, [_foo + #off] + uint64_t rel3VA = hint.offset0 + hint.delta[1] + isec->getVA(); + Ldr literalLdr = ldr3; + literalLdr.offset += rel1->referentVA - rel3VA; + if (isLiteralLdrEligible(literalLdr)) { + writeNop(buf + hint.offset0); + writeNop(buf + hint.offset0 + hint.delta[0]); + writeLiteralLdr(buf + hint.offset0 + hint.delta[1], literalLdr); + return; + } + + // Load the target address into a register and load from there indirectly. + // adr x1, _foo + // nop + // ldr x2, [x1, #off] + int64_t adrOffset = rel1->referentVA - rel1->rel.offset - isec->getVA(); + if (isValidAdrOffset(adrOffset)) { + writeAdr(buf + hint.offset0, ldr3.baseRegister, adrOffset); + writeNop(buf + hint.offset0 + hint.delta[0]); + return; + } + + // Move the target's page offset into the ldr's immediate offset. + // adrp x0, _foo@PAGE + // nop + // ldr x2, [x0, _foo@PAGEOFF + #off] + Ldr immediateLdr = ldr3; + immediateLdr.baseRegister = adrp.destRegister; + immediateLdr.offset += add2.addend; + if (isImmediateLdrEligible(immediateLdr)) { + writeNop(buf + hint.offset0 + hint.delta[0]); + writeImmediateLdr(buf + hint.offset0 + hint.delta[1], immediateLdr); + return; + } + } else if (parseLdr(ins2, ldr2)) { + // adrp x1, _foo@GOTPAGE + // ldr x2, [x1, _foo@GOTPAGEOFF] + // ldr x3, [x2, #off] + if (ldr2.baseRegister != adrp.destRegister) + return; + if (ldr3.baseRegister != ldr2.destRegister) + return; + // Loads from the GOT must be pointer sized. + if (ldr2.p2Size != 3 || ldr2.isFloat) + return; + + // Load the GOT entry's address directly. + // nop + // ldr x2, _foo@GOTPAGE + _foo@GOTPAGEOFF + // ldr x3, [x2, #off] + Ldr literalLdr = ldr2; + literalLdr.offset = rel1->referentVA - rel2->rel.offset - isec->getVA(); + if (isLiteralLdrEligible(literalLdr)) { + writeNop(buf + hint.offset0); + writeLiteralLdr(buf + hint.offset0 + hint.delta[0], literalLdr); + } + } } void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec, @@ -443,7 +587,11 @@ void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec, ctx1.applyAdrpLdr(hint); break; case LOH_ARM64_ADRP_ADD_LDR: + // TODO: Implement this + break; case LOH_ARM64_ADRP_LDR_GOT_LDR: + ctx1.applyAdrpLdrGotLdr(hint); + break; case LOH_ARM64_ADRP_ADD_STR: case LOH_ARM64_ADRP_LDR_GOT_STR: // TODO: Implement these @@ -452,7 +600,7 @@ void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec, ctx1.applyAdrpAdd(hint); break; case LOH_ARM64_ADRP_LDR_GOT: - // TODO: Implement this as well + ctx1.applyAdrpLdrGot(hint); break; } } diff --git a/lld/MachO/Arch/ARM64Common.cpp b/lld/MachO/Arch/ARM64Common.cpp index f55258ce8ec9..27fdf4ba14d9 100644 --- a/lld/MachO/Arch/ARM64Common.cpp +++ b/lld/MachO/Arch/ARM64Common.cpp @@ -109,3 +109,21 @@ void ARM64Common::relaxGotLoad(uint8_t *loc, uint8_t type) const { instruction = ((instruction & 0x001fffff) | 0x91000000); write32le(loc, instruction); } + +void ARM64Common::handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const { + assert(r.type == ARM64_RELOC_BRANCH26); + + if (config->outputType == MH_OBJECT) + return; + + if (sym->getName().startswith("___dtrace_probe")) { + // change call site to a NOP + write32le(loc, 0xD503201F); + } else if (sym->getName().startswith("___dtrace_isenabled")) { + // change call site to 'MOVZ X0,0' + write32le(loc, 0xD2800000); + } else { + error("Unrecognized dtrace symbol prefix: " + toString(*sym)); + } +} diff --git a/lld/MachO/Arch/ARM64Common.h b/lld/MachO/Arch/ARM64Common.h index 54f94ee76c06..1bd85066b35a 100644 --- a/lld/MachO/Arch/ARM64Common.h +++ b/lld/MachO/Arch/ARM64Common.h @@ -29,6 +29,9 @@ struct ARM64Common : TargetInfo { void relaxGotLoad(uint8_t *loc, uint8_t type) const override; uint64_t getPageSize() const override { return 16 * 1024; } + + void handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const override; }; inline uint64_t bitField(uint64_t value, int right, int width, int left) { diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp index d675356b9ffb..d2efa5bb3451 100644 --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -39,6 +39,9 @@ struct X86_64 : TargetInfo { void relaxGotLoad(uint8_t *loc, uint8_t type) const override; const RelocAttrs &getRelocAttrs(uint8_t type) const override; uint64_t getPageSize() const override { return 4 * 1024; } + + void handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const override; }; } // namespace @@ -199,3 +202,23 @@ TargetInfo *macho::createX86_64TargetInfo() { static X86_64 t; return &t; } + +void X86_64::handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const { + assert(r.type == X86_64_RELOC_BRANCH); + + if (config->outputType == MH_OBJECT) + return; + + if (sym->getName().startswith("___dtrace_probe")) { + // change call site to a NOP + loc[-1] = 0x90; + write32le(loc, 0x00401F0F); + } else if (sym->getName().startswith("___dtrace_isenabled")) { + // change call site to a clear eax + loc[-1] = 0x33; + write32le(loc, 0x909090C0); + } else { + error("Unrecognized dtrace symbol prefix: " + toString(*sym)); + } +} diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index b6c6abb44c65..ccf71b6535ea 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -131,9 +131,6 @@ struct Configuration { bool omitDebugInfo = false; bool warnDylibInstallName = false; bool ignoreOptimizationHints = false; - // Temporary config flag that will be removed once we have fully implemented - // support for __eh_frame. - bool parseEhFrames = false; uint32_t headerPad; uint32_t dylibCompatibilityVersion = 0; uint32_t dylibCurrentVersion = 0; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 708facd180ba..abfe381f41e0 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1305,7 +1305,6 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, config->callGraphProfileSort = args.hasFlag( OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true); config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order); - config->parseEhFrames = static_cast<bool>(getenv("LLD_IN_TEST")); // FIXME: Add a commandline flag for this too. config->zeroModTime = getenv("ZERO_AR_DATE"); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index c53874133a78..fda6900edabe 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -347,7 +347,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) { section.subsections.push_back({0, isec}); } else if (auto recordSize = getRecordSize(segname, name)) { splitRecords(*recordSize); - } else if (config->parseEhFrames && name == section_names::ehFrame && + } else if (name == section_names::ehFrame && segname == segment_names::text) { splitEhFrames(data, *sections.back()); } else if (segname == segment_names::llvm) { @@ -1117,7 +1117,7 @@ template <class LP> void ObjFile::parse() { } if (compactUnwindSection) registerCompactUnwind(*compactUnwindSection); - if (config->parseEhFrames && ehFrameSection) + if (ehFrameSection) registerEhFrames(*ehFrameSection); } @@ -1687,7 +1687,6 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella, umbrella = this; this->umbrella = umbrella; - auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart()); // Initialize installName. @@ -1722,39 +1721,53 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella, // Initialize symbols. exportingFile = isImplicitlyLinked(installName) ? this : this->umbrella; - if (const load_command *cmd = findCommand(hdr, LC_DYLD_INFO_ONLY)) { - auto *c = reinterpret_cast<const dyld_info_command *>(cmd); - struct TrieEntry { - StringRef name; - uint64_t flags; - }; - std::vector<TrieEntry> entries; - // Find all the $ld$* symbols to process first. - parseTrie(buf + c->export_off, c->export_size, - [&](const Twine &name, uint64_t flags) { - StringRef savedName = saver().save(name); - if (handleLDSymbol(savedName)) - return; - entries.push_back({savedName, flags}); - }); - - // Process the "normal" symbols. - for (TrieEntry &entry : entries) { - if (exportingFile->hiddenSymbols.contains( - CachedHashStringRef(entry.name))) - continue; + const auto *dyldInfo = findCommand<dyld_info_command>(hdr, LC_DYLD_INFO_ONLY); + const auto *exportsTrie = + findCommand<linkedit_data_command>(hdr, LC_DYLD_EXPORTS_TRIE); + if (dyldInfo && exportsTrie) { + // It's unclear what should happen in this case. Maybe we should only error + // out if the two load commands refer to different data? + error("dylib " + toString(this) + + " has both LC_DYLD_INFO_ONLY and LC_DYLD_EXPORTS_TRIE"); + return; + } else if (dyldInfo) { + parseExportedSymbols(dyldInfo->export_off, dyldInfo->export_size); + } else if (exportsTrie) { + parseExportedSymbols(exportsTrie->dataoff, exportsTrie->datasize); + } else { + error("No LC_DYLD_INFO_ONLY or LC_DYLD_EXPORTS_TRIE found in " + + toString(this)); + return; + } +} - bool isWeakDef = entry.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; - bool isTlv = entry.flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL; +void DylibFile::parseExportedSymbols(uint32_t offset, uint32_t size) { + struct TrieEntry { + StringRef name; + uint64_t flags; + }; - symbols.push_back( - symtab->addDylib(entry.name, exportingFile, isWeakDef, isTlv)); - } + auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); + std::vector<TrieEntry> entries; + // Find all the $ld$* symbols to process first. + parseTrie(buf + offset, size, [&](const Twine &name, uint64_t flags) { + StringRef savedName = saver().save(name); + if (handleLDSymbol(savedName)) + return; + entries.push_back({savedName, flags}); + }); - } else { - error("LC_DYLD_INFO_ONLY not found in " + toString(this)); - return; + // Process the "normal" symbols. + for (TrieEntry &entry : entries) { + if (exportingFile->hiddenSymbols.contains(CachedHashStringRef(entry.name))) + continue; + + bool isWeakDef = entry.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + bool isTlv = entry.flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL; + + symbols.push_back( + symtab->addDylib(entry.name, exportingFile, isWeakDef, isTlv)); } } diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index 524418b91ee1..efddc1c46782 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -250,6 +250,7 @@ private: void handleLDInstallNameSymbol(StringRef name, StringRef originalName); void handleLDHideSymbol(StringRef name, StringRef originalName); void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const; + void parseExportedSymbols(uint32_t offset, uint32_t size); llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols; }; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 25eb878736d9..df312525df61 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -201,6 +201,12 @@ void ConcatInputSection::writeTo(uint8_t *buf) { if (target->hasAttr(r.type, RelocAttrBits::LOAD) && !referentSym->isInGot()) target->relaxGotLoad(loc, r.type); + // For dtrace symbols, do not handle them as normal undefined symbols + if (referentSym->getName().startswith("___dtrace_")) { + // Change dtrace call site to pre-defined instructions + target->handleDtraceReloc(referentSym, r, loc); + continue; + } referentVA = resolveSymbolVA(referentSym, r.type) + r.addend; if (isThreadLocalVariables(getFlags())) { diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index d309f66c119f..7bda1d13069f 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -332,6 +332,10 @@ static bool recoverFromUndefinedSymbol(const Undefined &sym) { return true; } + // Leave dtrace symbols, since we will handle them when we do the relocation + if (name.startswith("___dtrace_")) + return true; + // Handle -U. if (config->explicitDynamicLookups.count(sym.getName())) { symtab->addDynamicLookup(sym.getName()); diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h index 09ff3c5639ea..597502275dee 100644 --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -79,6 +79,15 @@ public: bool usesThunks() const { return thunkSize > 0; } + // For now, handleDtraceReloc only implements -no_dtrace_dof, and ensures + // that the linking would not fail even when there are user-provided dtrace + // symbols. However, unlike ld64, lld currently does not emit __dof sections. + virtual void handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const { + llvm_unreachable("Unsupported architecture for dtrace symbols"); + } + + virtual void applyOptimizationHints(uint8_t *buf, const ConcatInputSection *, llvm::ArrayRef<uint64_t>) const {}; diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 2c4a469578de..8c3425a17459 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -506,7 +506,7 @@ void UnwindInfoSectionImpl::finalize() { secondLevelPages.emplace_back(); SecondLevelPage &page = secondLevelPages.back(); page.entryIndex = i; - uintptr_t functionAddressMax = + uint64_t functionAddressMax = cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK; size_t n = commonEncodings.size(); size_t wordsRemaining = diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 527ab4fead38..936d800cabc3 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -29,6 +29,8 @@ ELF Improvements * ``-z pack-relative-relocs`` is now available to support ``DT_RELR`` for glibc 2.36+. (`D120701 <https://reviews.llvm.org/D120701>`_) * ``--no-fortran-common`` (pre 12.0.0 behavior) is now the default. +* ``FORCE_LLD_DIAGNOSTICS_CRASH`` environment variable is now available to force LLD to crash. + (`D128195 <https://reviews.llvm.org/D128195>`_) Breaking changes ---------------- @@ -63,7 +65,11 @@ MinGW Improvements MachO Improvements ------------------ -* Item 1. +* We now support proper relocation and pruning of EH frames. **Note:** this + comes at some performance overhead on x86_64 builds, and we recommend adding + the ``-femit-compact-unwind=no-compact-unwind`` compile flag to avoid it. + (`D129540 <https://reviews.llvm.org/D129540>`_, + `D122258 <https://reviews.llvm.org/D122258>`_) WebAssembly Improvements ------------------------ diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp index d6c39fa7cce4..3ac59877affb 100644 --- a/lld/tools/lld/lld.cpp +++ b/lld/tools/lld/lld.cpp @@ -214,6 +214,12 @@ int main(int argc, const char **argv) { InitLLVM x(argc, argv); sys::Process::UseANSIEscapeCodes(true); + if (::getenv("FORCE_LLD_DIAGNOSTICS_CRASH")) { + llvm::errs() + << "crashing due to environment variable FORCE_LLD_DIAGNOSTICS_CRASH\n"; + LLVM_BUILTIN_TRAP; + } + // Not running in lit tests, just take the shortest codepath with global // exception handling and no memory cleanup on exit. if (!inTestVerbosity()) diff --git a/lldb/bindings/interface/SBSection.i b/lldb/bindings/interface/SBSection.i index b86d4e99c5ea..a138d81825b5 100644 --- a/lldb/bindings/interface/SBSection.i +++ b/lldb/bindings/interface/SBSection.i @@ -105,6 +105,9 @@ public: uint32_t GetTargetByteSize (); + uint32_t + GetAlignment (); + bool GetDescription (lldb::SBStream &description); @@ -138,6 +141,7 @@ public: data = property(GetSectionData, None, doc='''A read only property that returns an lldb object that represents the bytes for this section (lldb.SBData) for this section.''') type = property(GetSectionType, None, doc='''A read only property that returns an lldb enumeration value (see enumerations that start with "lldb.eSectionType") that represents the type of this section (code, data, etc.).''') target_byte_size = property(GetTargetByteSize, None, doc='''A read only property that returns the size of a target byte represented by this section as a number of host bytes.''') + alignment = property(GetAlignment, None, doc='''A read only property that returns the alignment of this section as a number of host bytes.''') %} #endif diff --git a/lldb/bindings/interface/SBTrace.i b/lldb/bindings/interface/SBTrace.i index 0f5bf0ecc8d9..0d74881a3f3d 100644 --- a/lldb/bindings/interface/SBTrace.i +++ b/lldb/bindings/interface/SBTrace.i @@ -17,6 +17,8 @@ public: const char *GetStartConfigurationHelp(); + SBFileSpec SaveToDisk(SBError &error, const SBFileSpec &bundle_dir, bool compact = false); + SBError Start(const SBStructuredData &configuration); SBError Start(const SBThread &thread, const SBStructuredData &configuration); diff --git a/lldb/include/lldb/API/SBSection.h b/lldb/include/lldb/API/SBSection.h index d722dbe4ff1f..94c6614ecfa9 100644 --- a/lldb/include/lldb/API/SBSection.h +++ b/lldb/include/lldb/API/SBSection.h @@ -76,6 +76,12 @@ public: /// The number of host (8-bit) bytes needed to hold a target byte uint32_t GetTargetByteSize(); + /// Return the alignment of the section in bytes + /// + /// \return + /// The alignment of the section in bytes + uint32_t GetAlignment(); + bool operator==(const lldb::SBSection &rhs); bool operator!=(const lldb::SBSection &rhs); diff --git a/lldb/include/lldb/API/SBTrace.h b/lldb/include/lldb/API/SBTrace.h index d5cf30f56637..19d759013955 100644 --- a/lldb/include/lldb/API/SBTrace.h +++ b/lldb/include/lldb/API/SBTrace.h @@ -25,6 +25,28 @@ public: static SBTrace LoadTraceFromFile(SBError &error, SBDebugger &debugger, const SBFileSpec &trace_description_file); + /// Save the trace to the specified directory, which will be created if + /// needed. This will also create a a file \a <directory>/trace.json with the + /// main properties of the trace session, along with others files which + /// contain the actual trace data. The trace.json file can be used later as + /// input for the "trace load" command to load the trace in LLDB, or for the + /// method \a SBDebugger.LoadTraceFromFile(). + /// + /// \param[out] error + /// This will be set with an error in case of failures. + /// + /// \param[in] directory + /// The directory where the trace files will be saved. + /// + /// \param[in] compact + /// Try not to save to disk information irrelevant to the traced processes. + /// Each trace plug-in implements this in a different fashion. + /// + /// \return + /// A \a SBFileSpec pointing to the bundle description file. + SBFileSpec SaveToDisk(SBError &error, const SBFileSpec &bundle_dir, + bool compact = false); + /// \return /// A description of the parameters to use for the \a SBTrace::Start /// method, or \b null if the object is invalid. diff --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h index 2dd1153031a6..2cb983c40d19 100644 --- a/lldb/include/lldb/Core/Disassembler.h +++ b/lldb/include/lldb/Core/Disassembler.h @@ -79,6 +79,12 @@ public: return m_comment.c_str(); } + /// \return + /// The control flow kind of this instruction, or + /// eInstructionControlFlowKindUnknown if the instruction + /// can't be classified. + lldb::InstructionControlFlowKind GetControlFlowKind(const ArchSpec &arch); + virtual void CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; @@ -105,6 +111,9 @@ public: /// \param[in] show_bytes /// Whether the bytes of the assembly instruction should be printed. /// + /// \param[in] show_control_flow_kind + /// Whether the control flow kind of the instruction should be printed. + /// /// \param[in] max_opcode_byte_size /// The size (in bytes) of the largest instruction in the list that /// we are printing (for text justification/alignment purposes) @@ -140,7 +149,8 @@ public: /// so this method can properly align the instruction opcodes. /// May be 0 to indicate no indentation/alignment of the opcodes. virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address, - bool show_bytes, const ExecutionContext *exe_ctx, + bool show_bytes, bool show_control_flow_kind, + const ExecutionContext *exe_ctx, const SymbolContext *sym_ctx, const SymbolContext *prev_sym_ctx, const FormatEntity::Entry *disassembly_addr_format, @@ -320,7 +330,7 @@ public: void Append(lldb::InstructionSP &inst_sp); void Dump(Stream *s, bool show_address, bool show_bytes, - const ExecutionContext *exe_ctx); + bool show_control_flow_kind, const ExecutionContext *exe_ctx); private: typedef std::vector<lldb::InstructionSP> collection; @@ -375,7 +385,8 @@ public: eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains // the current PC (mixed mode only) eOptionMarkPCAddress = - (1u << 3) // Mark the disassembly line the contains the PC + (1u << 3), // Mark the disassembly line the contains the PC + eOptionShowControlFlowKind = (1u << 4), }; enum HexImmediateStyle { diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h index 96a0e8e02da1..49e51d51f211 100644 --- a/lldb/include/lldb/Expression/DWARFExpression.h +++ b/lldb/include/lldb/Expression/DWARFExpression.h @@ -42,49 +42,14 @@ public: /// \param[in] data /// A data extractor configured to read the DWARF location expression's /// bytecode. - DWARFExpression(lldb::ModuleSP module, const DataExtractor &data, - const DWARFUnit *dwarf_cu); + DWARFExpression(const DataExtractor &data); /// Destructor virtual ~DWARFExpression(); - /// Print the description of the expression to a stream - /// - /// \param[in] s - /// The stream to print to. - /// - /// \param[in] level - /// The level of verbosity to use. - /// - /// \param[in] abi - /// An optional ABI plug-in that can be used to resolve register - /// names. - void GetDescription(Stream *s, lldb::DescriptionLevel level, ABI *abi) const; - /// Return true if the location expression contains data bool IsValid() const; - /// Return true if a location list was provided - bool IsLocationList() const; - - /// Search for a load address in the location list - /// - /// \param[in] func_load_addr - /// The actual address of the function containing this location list. - /// - /// \param[in] addr - /// The address to resolve - /// - /// \return - /// True if IsLocationList() is true and the address was found; - /// false otherwise. - // bool - // LocationListContainsLoadAddress (Process* process, const Address &addr) - // const; - // - bool LocationListContainsAddress(lldb::addr_t func_load_addr, - lldb::addr_t addr) const; - /// If a location is not a location list, return true if the location /// contains a DW_OP_addr () opcode in the stream that matches \a file_addr. /// If file_addr is LLDB_INVALID_ADDRESS, the this function will return true @@ -93,6 +58,9 @@ public: /// static variable since there is no other indication from DWARF debug /// info. /// + /// \param[in] dwarf_cu + /// The dwarf unit this expression belongs to. + /// /// \param[in] op_addr_idx /// The DW_OP_addr index to retrieve in case there is more than /// one DW_OP_addr opcode in the location byte stream. @@ -104,36 +72,22 @@ public: /// \return /// LLDB_INVALID_ADDRESS if the location doesn't contain a /// DW_OP_addr for \a op_addr_idx, otherwise a valid file address - lldb::addr_t GetLocation_DW_OP_addr(uint32_t op_addr_idx, bool &error) const; + lldb::addr_t GetLocation_DW_OP_addr(const DWARFUnit *dwarf_cu, + uint32_t op_addr_idx, bool &error) const; bool Update_DW_OP_addr(lldb::addr_t file_addr); void UpdateValue(uint64_t const_value, lldb::offset_t const_value_byte_size, uint8_t addr_byte_size); - void SetModule(const lldb::ModuleSP &module) { m_module_wp = module; } - bool ContainsThreadLocalStorage() const; bool LinkThreadLocalStorage( - lldb::ModuleSP new_module_sp, std::function<lldb::addr_t(lldb::addr_t file_addr)> const &link_address_callback); - /// Tells the expression that it refers to a location list. - /// - /// \param[in] cu_file_addr - /// The base address to use for interpreting relative location list - /// entries. - /// \param[in] func_file_addr - /// The file address of the function containing this location list. This - /// address will be used to relocate the location list on the fly (in - /// conjuction with the func_load_addr arguments). - void SetLocationListAddresses(lldb::addr_t cu_file_addr, - lldb::addr_t func_file_addr); - /// Return the call-frame-info style register kind - int GetRegisterKind(); + lldb::RegisterKind GetRegisterKind() const; /// Set the call-frame-info style register kind /// @@ -141,20 +95,6 @@ public: /// The register kind. void SetRegisterKind(lldb::RegisterKind reg_kind); - /// Wrapper for the static evaluate function that accepts an - /// ExecutionContextScope instead of an ExecutionContext and uses member - /// variables to populate many operands - bool Evaluate(ExecutionContextScope *exe_scope, lldb::addr_t func_load_addr, - const Value *initial_value_ptr, const Value *object_address_ptr, - Value &result, Status *error_ptr) const; - - /// Wrapper for the static evaluate function that uses member variables to - /// populate many operands - bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx, - lldb::addr_t loclist_base_load_addr, - const Value *initial_value_ptr, const Value *object_address_ptr, - Value &result, Status *error_ptr) const; - /// Evaluate a DWARF location expression in a particular context /// /// \param[in] exe_ctx @@ -194,72 +134,32 @@ public: /// True on success; false otherwise. If error_ptr is non-NULL, /// details of the failure are provided through it. static bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx, - lldb::ModuleSP opcode_ctx, const DataExtractor &opcodes, + lldb::ModuleSP module_sp, const DataExtractor &opcodes, const DWARFUnit *dwarf_cu, const lldb::RegisterKind reg_set, const Value *initial_value_ptr, const Value *object_address_ptr, Value &result, Status *error_ptr); + static bool ParseDWARFLocationList(const DWARFUnit *dwarf_cu, + const DataExtractor &data, + DWARFExpressionList *loc_list); + bool GetExpressionData(DataExtractor &data) const { data = m_data; return data.GetByteSize() > 0; } - bool DumpLocationForAddress(Stream *s, lldb::DescriptionLevel level, - lldb::addr_t func_load_addr, lldb::addr_t address, - ABI *abi); - - bool DumpLocations(Stream *s, lldb::DescriptionLevel level, - lldb::addr_t func_load_addr, lldb::addr_t addr, ABI *abi); - - bool GetLocationExpressions( - lldb::addr_t load_function_start, - llvm::function_ref<bool(llvm::DWARFLocationExpression)> callback) const; + void DumpLocation(Stream *s, lldb::DescriptionLevel level, ABI *abi) const; - bool MatchesOperand(StackFrame &frame, const Instruction::Operand &op); - - llvm::Optional<DataExtractor> - GetLocationExpression(lldb::addr_t load_function_start, - lldb::addr_t addr) const; + bool MatchesOperand(StackFrame &frame, const Instruction::Operand &op) const; private: - /// Pretty-prints the location expression to a stream - /// - /// \param[in] s - /// The stream to use for pretty-printing. - /// - /// \param[in] data - /// The data extractor. - /// - /// \param[in] level - /// The level of detail to use in pretty-printing. - /// - /// \param[in] abi - /// An optional ABI plug-in that can be used to resolve register - /// names. - void DumpLocation(Stream *s, const DataExtractor &data, - lldb::DescriptionLevel level, ABI *abi) const; - - /// Module which defined this expression. - lldb::ModuleWP m_module_wp; - /// A data extractor capable of reading opcode bytes DataExtractor m_data; - /// The DWARF compile unit this expression belongs to. It is used to evaluate - /// values indexing into the .debug_addr section (e.g. DW_OP_GNU_addr_index, - /// DW_OP_GNU_const_index) - const DWARFUnit *m_dwarf_cu = nullptr; - /// One of the defines that starts with LLDB_REGKIND_ lldb::RegisterKind m_reg_kind = lldb::eRegisterKindDWARF; - - struct LoclistAddresses { - lldb::addr_t cu_file_addr; - lldb::addr_t func_file_addr; - }; - llvm::Optional<LoclistAddresses> m_loclist_addresses; }; } // namespace lldb_private diff --git a/lldb/include/lldb/Expression/DWARFExpressionList.h b/lldb/include/lldb/Expression/DWARFExpressionList.h new file mode 100644 index 000000000000..a8f2a7126e3c --- /dev/null +++ b/lldb/include/lldb/Expression/DWARFExpressionList.h @@ -0,0 +1,151 @@ +//===-- DWARFExpressionList.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_EXPRESSION_DWARFEXPRESSIONLIST_H +#define LLDB_EXPRESSION_DWARFEXPRESSIONLIST_H + +#include "lldb/Expression/DWARFExpression.h" +#include "lldb/Utility/RangeMap.h" +#include "lldb/lldb-private.h" +#include "llvm/ADT/Optional.h" + +class DWARFUnit; + +namespace lldb_private { + +/// \class DWARFExpressionList DWARFExpressionList.h +/// "lldb/Expression/DWARFExpressionList.h" Encapsulates a range map from file +/// address range to a single DWARF location expression. +class DWARFExpressionList { +public: + DWARFExpressionList() = default; + + DWARFExpressionList(lldb::ModuleSP module_sp, const DWARFUnit *dwarf_cu, + lldb::addr_t func_file_addr) + : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu), + m_func_file_addr(func_file_addr) {} + + DWARFExpressionList(lldb::ModuleSP module_sp, DWARFExpression expr, + const DWARFUnit *dwarf_cu) + : m_module_wp(module_sp), m_dwarf_cu(dwarf_cu) { + AddExpression(0, LLDB_INVALID_ADDRESS, expr); + } + + /// Return true if the location expression contains data + bool IsValid() const { return !m_exprs.IsEmpty(); } + + void Clear() { m_exprs.Clear(); } + + // Return true if the location expression is always valid. + bool IsAlwaysValidSingleExpr() const; + + bool AddExpression(lldb::addr_t base, lldb::addr_t end, DWARFExpression expr); + + /// Get the expression data at the file address. + bool GetExpressionData(DataExtractor &data, + lldb::addr_t func_load_addr = LLDB_INVALID_ADDRESS, + lldb::addr_t file_addr = 0) const; + + /// Sort m_expressions. + void Sort() { m_exprs.Sort(); } + + void SetFuncFileAddress(lldb::addr_t func_file_addr) { + m_func_file_addr = func_file_addr; + } + + lldb::addr_t GetFuncFileAddress() { return m_func_file_addr; } + + const DWARFExpression *GetExpressionAtAddress(lldb::addr_t func_load_addr, + lldb::addr_t load_addr) const; + + const DWARFExpression *GetAlwaysValidExpr() const; + + DWARFExpression *GetMutableExpressionAtAddress( + lldb::addr_t func_load_addr = LLDB_INVALID_ADDRESS, + lldb::addr_t load_addr = 0); + + size_t GetSize() const { return m_exprs.GetSize(); } + + bool ContainsThreadLocalStorage() const; + + bool LinkThreadLocalStorage( + lldb::ModuleSP new_module_sp, + std::function<lldb::addr_t(lldb::addr_t file_addr)> const + &link_address_callback); + + bool MatchesOperand(StackFrame &frame, + const Instruction::Operand &operand) const; + + /// Dump locations that contains file_addr if it's valid. Otherwise. dump all + /// locations. + bool DumpLocations(Stream *s, lldb::DescriptionLevel level, + lldb::addr_t func_load_addr, lldb::addr_t file_addr, + ABI *abi) const; + + /// Dump all locaitons with each seperated by new line. + void GetDescription(Stream *s, lldb::DescriptionLevel level, ABI *abi) const; + + /// Search for a load address in the dwarf location list + /// + /// \param[in] func_load_addr + /// The actual address of the function containing this location list. + /// + /// \param[in] addr + /// The address to resolve. + /// + /// \return + /// True if IsLocationList() is true and the address was found; + /// false otherwise. + // bool + // LocationListContainsLoadAddress (Process* process, const Address &addr) + // const; + // + bool ContainsAddress(lldb::addr_t func_load_addr, lldb::addr_t addr) const; + + void SetModule(const lldb::ModuleSP &module) { m_module_wp = module; } + + bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx, + lldb::addr_t func_load_addr, const Value *initial_value_ptr, + const Value *object_address_ptr, Value &result, + Status *error_ptr) const; + +private: + // RangeDataVector requires a comparator for DWARFExpression, but it doesn't + // make sense to do so. + struct DWARFExpressionCompare { + public: + bool operator()(const DWARFExpression &lhs, + const DWARFExpression &rhs) const { + return false; + } + }; + using ExprVec = RangeDataVector<lldb::addr_t, lldb::addr_t, DWARFExpression, + 0, DWARFExpressionCompare>; + using Entry = ExprVec::Entry; + + // File address range mapping to single dwarf expression. + ExprVec m_exprs; + + /// Module which defined this expression. + lldb::ModuleWP m_module_wp; + + /// The DWARF compile unit this expression belongs to. It is used to evaluate + /// values indexing into the .debug_addr section (e.g. DW_OP_GNU_addr_index, + /// DW_OP_GNU_const_index) + const DWARFUnit *m_dwarf_cu = nullptr; + + // Function base file address. + lldb::addr_t m_func_file_addr = LLDB_INVALID_ADDRESS; + + using const_iterator = ExprVec::Collection::const_iterator; + const_iterator begin() const { return m_exprs.begin(); } + const_iterator end() const { return m_exprs.end(); } +}; +} // namespace lldb_private + +#endif // LLDB_EXPRESSION_DWARFEXPRESSIONLIST_H diff --git a/lldb/include/lldb/Interpreter/CommandObject.h b/lldb/include/lldb/Interpreter/CommandObject.h index 45fc47b02c04..0fc1c61bdb92 100644 --- a/lldb/include/lldb/Interpreter/CommandObject.h +++ b/lldb/include/lldb/Interpreter/CommandObject.h @@ -104,9 +104,6 @@ public: typedef std::vector<CommandArgumentData> CommandArgumentEntry; // Used to build individual command argument lists - static ArgumentTableEntry g_arguments_data - [lldb::eArgTypeLastArg]; // Main argument information table - typedef std::map<std::string, lldb::CommandObjectSP> CommandMap; CommandObject(CommandInterpreter &interpreter, llvm::StringRef name, diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h index 83f9979c3c52..3eb4f5d7dedf 100644 --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -12,7 +12,7 @@ #include "lldb/Core/AddressRange.h" #include "lldb/Core/Declaration.h" #include "lldb/Core/Mangled.h" -#include "lldb/Expression/DWARFExpression.h" +#include "lldb/Expression/DWARFExpressionList.h" #include "lldb/Symbol/Block.h" #include "lldb/Utility/UserID.h" #include "llvm/ADT/ArrayRef.h" @@ -253,8 +253,8 @@ class Function; /// Represent the locations of a parameter at a call site, both in the caller /// and in the callee. struct CallSiteParameter { - DWARFExpression LocationInCallee; - DWARFExpression LocationInCaller; + DWARFExpressionList LocationInCallee; + DWARFExpressionList LocationInCaller; }; /// A vector of \c CallSiteParameter. @@ -370,7 +370,7 @@ class IndirectCallEdge : public CallEdge { public: /// Construct a call edge using a DWARFExpression to identify the callee, and /// a return PC within the calling function to identify a specific call site. - IndirectCallEdge(DWARFExpression call_target, AddrType caller_address_type, + IndirectCallEdge(DWARFExpressionList call_target, AddrType caller_address_type, lldb::addr_t caller_address, bool is_tail_call, CallSiteParameterArray &¶meters) : CallEdge(caller_address_type, caller_address, is_tail_call, @@ -383,7 +383,7 @@ private: // Used to describe an indirect call. // // Specifies the location of the callee address in the calling frame. - DWARFExpression call_target; + DWARFExpressionList call_target; }; /// \class Function Function.h "lldb/Symbol/Function.h" @@ -521,13 +521,13 @@ public: /// \return /// A location expression that describes the function frame /// base. - DWARFExpression &GetFrameBaseExpression() { return m_frame_base; } + DWARFExpressionList &GetFrameBaseExpression() { return m_frame_base; } /// Get const accessor for the frame base location. /// /// \return /// A const compile unit object pointer. - const DWARFExpression &GetFrameBaseExpression() const { return m_frame_base; } + const DWARFExpressionList &GetFrameBaseExpression() const { return m_frame_base; } ConstString GetName() const; @@ -659,7 +659,7 @@ protected: /// The frame base expression for variables that are relative to the frame /// pointer. - DWARFExpression m_frame_base; + DWARFExpressionList m_frame_base; Flags m_flags; diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index c61e3c138944..e51d50592c90 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -673,6 +673,7 @@ public: virtual size_t ReadSectionData(Section *section, DataExtractor §ion_data); + /// Returns true if the object file exists only in memory. bool IsInMemory() const { return m_memory_addr != LLDB_INVALID_ADDRESS; } // Strip linker annotations (such as @@VERSION) from symbol names. @@ -736,6 +737,7 @@ protected: DataExtractor m_data; ///< The data for this object file so things can be parsed lazily. lldb::ProcessWP m_process_wp; + /// Set if the object file only exists in memory. const lldb::addr_t m_memory_addr; std::unique_ptr<lldb_private::SectionList> m_sections_up; std::unique_ptr<lldb_private::Symtab> m_symtab_up; diff --git a/lldb/include/lldb/Symbol/TypeList.h b/lldb/include/lldb/Symbol/TypeList.h index 03390858025b..403469c989f5 100644 --- a/lldb/include/lldb/Symbol/TypeList.h +++ b/lldb/include/lldb/Symbol/TypeList.h @@ -49,10 +49,11 @@ public: void ForEach(std::function<bool(lldb::TypeSP &type_sp)> const &callback); - void RemoveMismatchedTypes(const char *qualified_typename, bool exact_match); + void RemoveMismatchedTypes(llvm::StringRef qualified_typename, + bool exact_match); - void RemoveMismatchedTypes(const std::string &type_scope, - const std::string &type_basename, + void RemoveMismatchedTypes(llvm::StringRef type_scope, + llvm::StringRef type_basename, lldb::TypeClass type_class, bool exact_match); void RemoveMismatchedTypes(lldb::TypeClass type_class); diff --git a/lldb/include/lldb/Symbol/TypeMap.h b/lldb/include/lldb/Symbol/TypeMap.h index ede54c1a09d4..c200ccb9844f 100644 --- a/lldb/include/lldb/Symbol/TypeMap.h +++ b/lldb/include/lldb/Symbol/TypeMap.h @@ -53,14 +53,10 @@ public: bool Remove(const lldb::TypeSP &type_sp); - void RemoveMismatchedTypes(const char *qualified_typename, bool exact_match); - - void RemoveMismatchedTypes(const std::string &type_scope, - const std::string &type_basename, + void RemoveMismatchedTypes(llvm::StringRef type_scope, + llvm::StringRef type_basename, lldb::TypeClass type_class, bool exact_match); - void RemoveMismatchedTypes(lldb::TypeClass type_class); - private: typedef collection::iterator iterator; typedef collection::const_iterator const_iterator; diff --git a/lldb/include/lldb/Symbol/Variable.h b/lldb/include/lldb/Symbol/Variable.h index 88a975df3992..c437624d1ea6 100644 --- a/lldb/include/lldb/Symbol/Variable.h +++ b/lldb/include/lldb/Symbol/Variable.h @@ -11,7 +11,7 @@ #include "lldb/Core/Declaration.h" #include "lldb/Core/Mangled.h" -#include "lldb/Expression/DWARFExpression.h" +#include "lldb/Expression/DWARFExpressionList.h" #include "lldb/Utility/CompletionRequest.h" #include "lldb/Utility/RangeMap.h" #include "lldb/Utility/UserID.h" @@ -32,8 +32,8 @@ public: Variable(lldb::user_id_t uid, const char *name, const char *mangled, const lldb::SymbolFileTypeSP &symfile_type_sp, lldb::ValueType scope, SymbolContextScope *owner_scope, const RangeList &scope_range, - Declaration *decl, const DWARFExpression &location, bool external, - bool artificial, bool location_is_constant_data, + Declaration *decl, const DWARFExpressionList &location, + bool external, bool artificial, bool location_is_constant_data, bool static_member = false); virtual ~Variable(); @@ -73,9 +73,11 @@ public: bool IsStaticMember() const { return m_static_member; } - DWARFExpression &LocationExpression() { return m_location; } + DWARFExpressionList &LocationExpressionList() { return m_location_list; } - const DWARFExpression &LocationExpression() const { return m_location; } + const DWARFExpressionList &LocationExpressionList() const { + return m_location_list; + } // When given invalid address, it dumps all locations. Otherwise it only dumps // the location that contains this address. @@ -128,7 +130,7 @@ protected: Declaration m_declaration; /// The location of this variable that can be fed to /// DWARFExpression::Evaluate(). - DWARFExpression m_location; + DWARFExpressionList m_location_list; /// Visible outside the containing compile unit? unsigned m_external : 1; /// Non-zero if the variable is not explicitly declared in source. diff --git a/lldb/include/lldb/Target/StackFrame.h b/lldb/include/lldb/Target/StackFrame.h index 1b0485b22cac..7c4340de4de0 100644 --- a/lldb/include/lldb/Target/StackFrame.h +++ b/lldb/include/lldb/Target/StackFrame.h @@ -202,7 +202,7 @@ public: /// frames may be unable to provide this value; they will return false. bool GetFrameBaseValue(Scalar &value, Status *error_ptr); - /// Get the DWARFExpression corresponding to the Canonical Frame Address. + /// Get the DWARFExpressionList corresponding to the Canonical Frame Address. /// /// Often a register (bp), but sometimes a register + offset. /// @@ -212,7 +212,7 @@ public: /// /// \return /// Returns the corresponding DWARF expression, or NULL. - DWARFExpression *GetFrameBaseExpression(Status *error_ptr); + DWARFExpressionList *GetFrameBaseExpression(Status *error_ptr); /// Get the current lexical scope block for this StackFrame, if possible. /// diff --git a/lldb/include/lldb/Target/Trace.h b/lldb/include/lldb/Target/Trace.h index f4d7dee684c3..beae9e28417d 100644 --- a/lldb/include/lldb/Target/Trace.h +++ b/lldb/include/lldb/Target/Trace.h @@ -56,21 +56,24 @@ public: /// A stream object to dump the information to. virtual void Dump(Stream *s) const = 0; - /// Save the trace of a live process to the specified directory, which - /// will be created if needed. - /// This will also create a a file \a <directory>/trace.json with the main - /// properties of the trace session, along with others files which contain - /// the actual trace data. The trace.json file can be used later as input - /// for the "trace load" command to load the trace in LLDB. - /// The process being trace is not a live process, return an error. + /// Save the trace to the specified directory, which will be created if + /// needed. This will also create a a file \a <directory>/trace.json with the + /// main properties of the trace session, along with others files which + /// contain the actual trace data. The trace.json file can be used later as + /// input for the "trace load" command to load the trace in LLDB. /// /// \param[in] directory /// The directory where the trace files will be saved. /// + /// \param[in] compact + /// Try not to save to disk information irrelevant to the traced processes. + /// Each trace plug-in implements this in a different fashion. + /// /// \return - /// \a llvm::success if the operation was successful, or an \a llvm::Error - /// otherwise. - virtual llvm::Error SaveLiveTraceToDisk(FileSpec directory) = 0; + /// A \a FileSpec pointing to the bundle description file, or an \a + /// llvm::Error otherwise. + virtual llvm::Expected<FileSpec> SaveToDisk(FileSpec directory, + bool compact) = 0; /// Find a trace plug-in using JSON data. /// @@ -183,7 +186,8 @@ public: /// \param[in] verbose /// If \b true, print detailed info /// If \b false, print compact info - virtual void DumpTraceInfo(Thread &thread, Stream &s, bool verbose) = 0; + virtual void DumpTraceInfo(Thread &thread, Stream &s, bool verbose, + bool json) = 0; /// Check if a thread is currently traced by this object. /// diff --git a/lldb/include/lldb/Target/TraceCursor.h b/lldb/include/lldb/Target/TraceCursor.h index a4cf6433c19a..f6337e3d3d3f 100644 --- a/lldb/include/lldb/Target/TraceCursor.h +++ b/lldb/include/lldb/Target/TraceCursor.h @@ -266,6 +266,16 @@ public: /// The value of the counter or \b llvm::None if not available. virtual llvm::Optional<uint64_t> GetCounter(lldb::TraceCounter counter_type) const = 0; + + /// Get the CPU associated with the current trace item. + /// + /// This call might not be O(1), so it's suggested to invoke this method + /// whenever a cpu change event is fired. + /// + /// \return + /// The requested CPU id, or \a llvm::None if this information is + /// not available for the current item. + virtual llvm::Optional<lldb::cpu_id_t> GetCPU() const = 0; /// \} protected: diff --git a/lldb/include/lldb/Target/TraceDumper.h b/lldb/include/lldb/Target/TraceDumper.h index e78836e45b01..bbc1a55873d7 100644 --- a/lldb/include/lldb/Target/TraceDumper.h +++ b/lldb/include/lldb/Target/TraceDumper.h @@ -34,6 +34,8 @@ struct TraceDumperOptions { bool show_tsc = false; /// Dump the events that happened between instructions. bool show_events = false; + /// For each instruction, print the instruction kind. + bool show_control_flow_kind = false; /// Optional custom id to start traversing from. llvm::Optional<uint64_t> id = llvm::None; /// Optional number of instructions to skip from the starting position @@ -64,6 +66,7 @@ public: llvm::Optional<lldb::TraceEvent> event; llvm::Optional<SymbolInfo> symbol_info; llvm::Optional<SymbolInfo> prev_symbol_info; + llvm::Optional<lldb::cpu_id_t> cpu_id; }; /// Interface used to abstract away the format in which the instruction diff --git a/lldb/include/lldb/Utility/Environment.h b/lldb/include/lldb/Utility/Environment.h index c1549a3d60a6..27d740402c30 100644 --- a/lldb/include/lldb/Utility/Environment.h +++ b/lldb/include/lldb/Utility/Environment.h @@ -57,7 +57,7 @@ public: using Base::operator[]; Environment() {} - Environment(const Environment &RHS) : Base(RHS) {} + Environment(const Environment &RHS) : Base(static_cast<const Base&>(RHS)) {} Environment(Environment &&RHS) : Base(std::move(RHS)) {} Environment(char *const *Env) : Environment(const_cast<const char *const *>(Env)) {} diff --git a/lldb/include/lldb/Utility/RangeMap.h b/lldb/include/lldb/Utility/RangeMap.h index 7eb0cab8084c..257b177c7092 100644 --- a/lldb/include/lldb/Utility/RangeMap.h +++ b/lldb/include/lldb/Utility/RangeMap.h @@ -627,6 +627,10 @@ public: return (m_entries.empty() ? nullptr : &m_entries.back()); } + using const_iterator = typename Collection::const_iterator; + const_iterator begin() const { return m_entries.begin(); } + const_iterator end() const { return m_entries.end(); } + protected: Collection m_entries; Compare m_compare; diff --git a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h index 36b594613a91..bf9409743a6d 100644 --- a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h +++ b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h @@ -50,6 +50,10 @@ struct TraceIntelPTStartRequest : TraceStartRequest { /// Whether to have a trace buffer per thread or per cpu cpu. llvm::Optional<bool> per_cpu_tracing; + /// Disable the cgroup filtering that is automatically applied in per cpu + /// mode. + llvm::Optional<bool> disable_cgroup_filtering; + bool IsPerCpuTracing() const; }; @@ -107,6 +111,7 @@ struct LinuxPerfZeroTscConversion { struct TraceIntelPTGetStateResponse : TraceGetStateResponse { /// The TSC to wall time conversion if it exists, otherwise \b nullptr. llvm::Optional<LinuxPerfZeroTscConversion> tsc_perf_zero_conversion; + bool using_cgroup_filtering = false; }; bool fromJSON(const llvm::json::Value &value, diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index eba2667727f2..ad03f7e43056 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -970,20 +970,30 @@ enum ExpressionEvaluationPhase { /// control flow of a trace. /// /// A single instruction can match one or more of these categories. -FLAGS_ENUM(TraceInstructionControlFlowType){ - /// Any instruction. - eTraceInstructionControlFlowTypeInstruction = (1u << 1), - /// A conditional or unconditional branch/jump. - eTraceInstructionControlFlowTypeBranch = (1u << 2), - /// A conditional or unconditional branch/jump that changed - /// the control flow of the program. - eTraceInstructionControlFlowTypeTakenBranch = (1u << 3), - /// A call to a function. - eTraceInstructionControlFlowTypeCall = (1u << 4), - /// A return from a function. - eTraceInstructionControlFlowTypeReturn = (1u << 5)}; - -LLDB_MARK_AS_BITMASK_ENUM(TraceInstructionControlFlowType) +enum InstructionControlFlowKind { + /// The instruction could not be classified. + eInstructionControlFlowKindUnknown = 0, + /// The instruction is something not listed below, i.e. it's a sequential + /// instruction that doesn't affect the control flow of the program. + eInstructionControlFlowKindOther, + /// The instruction is a near (function) call. + eInstructionControlFlowKindCall, + /// The instruction is a near (function) return. + eInstructionControlFlowKindReturn, + /// The instruction is a near unconditional jump. + eInstructionControlFlowKindJump, + /// The instruction is a near conditional jump. + eInstructionControlFlowKindCondJump, + /// The instruction is a call-like far transfer. + /// E.g. SYSCALL, SYSENTER, or FAR CALL. + eInstructionControlFlowKindFarCall, + /// The instruction is a return-like far transfer. + /// E.g. SYSRET, SYSEXIT, IRET, or FAR RET. + eInstructionControlFlowKindFarReturn, + /// The instruction is a jump-like far transfer. + /// E.g. FAR JMP. + eInstructionControlFlowKindFarJump +}; /// Watchpoint Kind. /// @@ -1153,12 +1163,15 @@ enum TraceCounter { eTraceCounterTSC = 0, }; -// Events that might happen during a trace session. +/// Events that might happen during a trace session. enum TraceEvent { - // Tracing was disabled for some time due to a software trigger + /// Tracing was disabled for some time due to a software trigger eTraceEventDisabledSW, - // Tracing was disable for some time due to a hardware trigger + /// Tracing was disable for some time due to a hardware trigger eTraceEventDisabledHW, + /// Event due to CPU change for a thread. This event is also fired when + /// suddenly it's not possible to identify the cpu of a given thread. + eTraceEventCPUChanged, }; // Enum used to identify which kind of item a \a TraceCursor is pointing at diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 487b2f20792b..c51e1850338f 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -66,6 +66,7 @@ class ConstStringTable; class DWARFCallFrameInfo; class DWARFDataExtractor; class DWARFExpression; +class DWARFExpressionList; class DataBuffer; class WritableDataBuffer; class DataBufferHeap; diff --git a/lldb/source/API/SBInstruction.cpp b/lldb/source/API/SBInstruction.cpp index 6cb9e5dbc1af..ced22628a297 100644 --- a/lldb/source/API/SBInstruction.cpp +++ b/lldb/source/API/SBInstruction.cpp @@ -241,7 +241,8 @@ bool SBInstruction::GetDescription(lldb::SBStream &s) { // didn't have a stream already created, one will get created... FormatEntity::Entry format; FormatEntity::Parse("${addr}: ", format); - inst_sp->Dump(&s.ref(), 0, true, false, nullptr, &sc, nullptr, &format, 0); + inst_sp->Dump(&s.ref(), 0, true, false, /*show_control_flow_kind=*/false, + nullptr, &sc, nullptr, &format, 0); return true; } return false; @@ -275,8 +276,8 @@ void SBInstruction::Print(FileSP out_sp) { StreamFile out_stream(out_sp); FormatEntity::Entry format; FormatEntity::Parse("${addr}: ", format); - inst_sp->Dump(&out_stream, 0, true, false, nullptr, &sc, nullptr, &format, - 0); + inst_sp->Dump(&out_stream, 0, true, false, /*show_control_flow_kind=*/false, + nullptr, &sc, nullptr, &format, 0); } } diff --git a/lldb/source/API/SBInstructionList.cpp b/lldb/source/API/SBInstructionList.cpp index e289e8e9343d..ae87d7965766 100644 --- a/lldb/source/API/SBInstructionList.cpp +++ b/lldb/source/API/SBInstructionList.cpp @@ -165,8 +165,9 @@ bool SBInstructionList::GetDescription(Stream &sref) { addr, eSymbolContextEverything, sc); } - inst->Dump(&sref, max_opcode_byte_size, true, false, nullptr, &sc, - &prev_sc, &format, 0); + inst->Dump(&sref, max_opcode_byte_size, true, false, + /*show_control_flow_kind=*/false, nullptr, &sc, &prev_sc, + &format, 0); sref.EOL(); } return true; diff --git a/lldb/source/API/SBSection.cpp b/lldb/source/API/SBSection.cpp index 733e0db0b5ba..3a9cf20e484a 100644 --- a/lldb/source/API/SBSection.cpp +++ b/lldb/source/API/SBSection.cpp @@ -242,6 +242,15 @@ uint32_t SBSection::GetTargetByteSize() { return 0; } +uint32_t SBSection::GetAlignment() { + LLDB_INSTRUMENT_VA(this); + + SectionSP section_sp(GetSP()); + if (section_sp.get()) + return (1 << section_sp->GetLog2Align()); + return 0; +} + bool SBSection::operator==(const SBSection &rhs) { LLDB_INSTRUMENT_VA(this, rhs); diff --git a/lldb/source/API/SBTrace.cpp b/lldb/source/API/SBTrace.cpp index fe9003237073..2b1f140161b6 100644 --- a/lldb/source/API/SBTrace.cpp +++ b/lldb/source/API/SBTrace.cpp @@ -43,6 +43,24 @@ SBTrace SBTrace::LoadTraceFromFile(SBError &error, SBDebugger &debugger, return SBTrace(trace_or_err.get()); } +SBFileSpec SBTrace::SaveToDisk(SBError &error, const SBFileSpec &bundle_dir, + bool compact) { + LLDB_INSTRUMENT_VA(this, error, bundle_dir, compact); + + error.Clear(); + SBFileSpec file_spec; + + if (!m_opaque_sp) + error.SetErrorString("error: invalid trace"); + else if (Expected<FileSpec> desc_file = + m_opaque_sp->SaveToDisk(bundle_dir.ref(), compact)) + file_spec.SetFileSpec(*desc_file); + else + error.SetErrorString(llvm::toString(desc_file.takeError()).c_str()); + + return file_spec; +} + const char *SBTrace::GetStartConfigurationHelp() { LLDB_INSTRUMENT_VA(this); return m_opaque_sp ? m_opaque_sp->GetStartConfigurationHelp() : nullptr; diff --git a/lldb/source/Commands/CommandObjectDisassemble.cpp b/lldb/source/Commands/CommandObjectDisassemble.cpp index 9d081c83c0fb..6c33edc8a3a8 100644 --- a/lldb/source/Commands/CommandObjectDisassemble.cpp +++ b/lldb/source/Commands/CommandObjectDisassemble.cpp @@ -65,6 +65,10 @@ Status CommandObjectDisassemble::CommandOptions::SetOptionValue( show_bytes = true; break; + case 'k': + show_control_flow_kind = true; + break; + case 's': { start_addr = OptionArgParser::ToAddress(execution_context, option_arg, LLDB_INVALID_ADDRESS, &error); @@ -154,6 +158,7 @@ void CommandObjectDisassemble::CommandOptions::OptionParsingStarting( ExecutionContext *execution_context) { show_mixed = false; show_bytes = false; + show_control_flow_kind = false; num_lines_context = 0; num_instructions = 0; func_name.clear(); @@ -493,6 +498,9 @@ bool CommandObjectDisassemble::DoExecute(Args &command, if (m_options.show_bytes) options |= Disassembler::eOptionShowBytes; + if (m_options.show_control_flow_kind) + options |= Disassembler::eOptionShowControlFlowKind; + if (m_options.raw) options |= Disassembler::eOptionRawOuput; diff --git a/lldb/source/Commands/CommandObjectDisassemble.h b/lldb/source/Commands/CommandObjectDisassemble.h index a4b3df8724da..b5146863628d 100644 --- a/lldb/source/Commands/CommandObjectDisassemble.h +++ b/lldb/source/Commands/CommandObjectDisassemble.h @@ -46,6 +46,7 @@ public: bool show_mixed; // Show mixed source/assembly bool show_bytes; + bool show_control_flow_kind; uint32_t num_lines_context = 0; uint32_t num_instructions = 0; bool raw; diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index c76ae99057f2..d36a574aba7d 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -579,14 +579,14 @@ protected: } } } - + Target *target = m_exe_ctx.GetTargetPtr(); BreakpointIDList run_to_bkpt_ids; // Don't pass an empty run_to_breakpoint list, as Verify will look for the // default breakpoint. if (m_options.m_run_to_bkpt_args.GetArgumentCount() > 0) CommandObjectMultiwordBreakpoint::VerifyBreakpointOrLocationIDs( - m_options.m_run_to_bkpt_args, target, result, &run_to_bkpt_ids, + m_options.m_run_to_bkpt_args, target, result, &run_to_bkpt_ids, BreakpointName::Permissions::disablePerm); if (!result.Succeeded()) { return false; @@ -604,7 +604,7 @@ protected: std::vector<break_id_t> bkpts_disabled; std::vector<BreakpointID> locs_disabled; if (num_run_to_bkpt_ids != 0) { - // Go through the ID's specified, and separate the breakpoints from are + // Go through the ID's specified, and separate the breakpoints from are // the breakpoint.location specifications since the latter require // special handling. We also figure out whether there's at least one // specifier in the set that is enabled. @@ -613,22 +613,22 @@ protected: std::unordered_set<break_id_t> bkpts_with_locs_seen; BreakpointIDList with_locs; bool any_enabled = false; - + for (size_t idx = 0; idx < num_run_to_bkpt_ids; idx++) { BreakpointID bkpt_id = run_to_bkpt_ids.GetBreakpointIDAtIndex(idx); break_id_t bp_id = bkpt_id.GetBreakpointID(); break_id_t loc_id = bkpt_id.GetLocationID(); - BreakpointSP bp_sp + BreakpointSP bp_sp = bkpt_list.FindBreakpointByID(bp_id); - // Note, VerifyBreakpointOrLocationIDs checks for existence, so we + // Note, VerifyBreakpointOrLocationIDs checks for existence, so we // don't need to do it again here. if (bp_sp->IsEnabled()) { if (loc_id == LLDB_INVALID_BREAK_ID) { - // A breakpoint (without location) was specified. Make sure that + // A breakpoint (without location) was specified. Make sure that // at least one of the locations is enabled. size_t num_locations = bp_sp->GetNumLocations(); for (size_t loc_idx = 0; loc_idx < num_locations; loc_idx++) { - BreakpointLocationSP loc_sp + BreakpointLocationSP loc_sp = bp_sp->GetLocationAtIndex(loc_idx); if (loc_sp->IsEnabled()) { any_enabled = true; @@ -641,7 +641,7 @@ protected: if (loc_sp->IsEnabled()) any_enabled = true; } - + // Then sort the bp & bp.loc entries for later use: if (bkpt_id.GetLocationID() == LLDB_INVALID_BREAK_ID) bkpts_seen.insert(bkpt_id.GetBreakpointID()); @@ -653,14 +653,14 @@ protected: } // Do all the error checking here so once we start disabling we don't // have to back out half-way through. - + // Make sure at least one of the specified breakpoints is enabled. if (!any_enabled) { result.AppendError("at least one of the continue-to breakpoints must " "be enabled."); return false; } - + // Also, if you specify BOTH a breakpoint and one of it's locations, // we flag that as an error, since it won't do what you expect, the // breakpoint directive will mean "run to all locations", which is not @@ -671,7 +671,7 @@ protected: "one of its locations: {0}", bp_id); } } - + // Now go through the breakpoints in the target, disabling all the ones // that the user didn't mention: for (BreakpointSP bp_sp : bkpt_list.Breakpoints()) { @@ -695,7 +695,7 @@ protected: BreakpointLocationSP loc_sp = bp_sp->GetLocationAtIndex(loc_idx); tmp_id.SetBreakpointLocationID(loc_idx); size_t position = 0; - if (!with_locs.FindBreakpointID(tmp_id, &position) + if (!with_locs.FindBreakpointID(tmp_id, &position) && loc_sp->IsEnabled()) { locs_disabled.push_back(tmp_id); loc_sp->SetEnabled(false); @@ -723,20 +723,20 @@ protected: Status error; // For now we can only do -b with synchronous: bool old_sync = GetDebugger().GetAsyncExecution(); - + if (run_to_bkpt_ids.GetSize() != 0) { GetDebugger().SetAsyncExecution(false); synchronous_execution = true; - } + } if (synchronous_execution) error = process->ResumeSynchronous(&stream); else error = process->Resume(); - + if (run_to_bkpt_ids.GetSize() != 0) { GetDebugger().SetAsyncExecution(old_sync); - } - + } + // Now re-enable the breakpoints we disabled: BreakpointList &bkpt_list = target->GetBreakpointList(); for (break_id_t bp_id : bkpts_disabled) { @@ -745,10 +745,10 @@ protected: bp_sp->SetEnabled(true); } for (const BreakpointID &bkpt_id : locs_disabled) { - BreakpointSP bp_sp + BreakpointSP bp_sp = bkpt_list.FindBreakpointByID(bkpt_id.GetBreakpointID()); if (bp_sp) { - BreakpointLocationSP loc_sp + BreakpointLocationSP loc_sp = bp_sp->FindLocationByID(bkpt_id.GetLocationID()); if (loc_sp) loc_sp->SetEnabled(true); @@ -1731,7 +1731,7 @@ protected: bool DoExecute(Args &signal_args, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(); - // Any signals that are being set should be added to the Target's + // Any signals that are being set should be added to the Target's // DummySignals so they will get applied on rerun, etc. // If we have a process, however, we can do a more accurate job of vetting // the user's options. @@ -1761,8 +1761,8 @@ protected: "true or false.\n"); return false; } - - bool no_actions = (stop_action == -1 && pass_action == -1 + + bool no_actions = (stop_action == -1 && pass_action == -1 && notify_action == -1); if (m_options.only_target_values && !no_actions) { result.AppendError("-t is for reporting, not setting, target values."); @@ -1832,9 +1832,9 @@ protected: } auto set_lazy_bool = [] (int action) -> LazyBool { LazyBool lazy; - if (action == -1) + if (action == -1) lazy = eLazyBoolCalculate; - else if (action) + else if (action) lazy = eLazyBoolYes; else lazy = eLazyBoolNo; @@ -1876,7 +1876,7 @@ protected: PrintSignalInformation(result.GetOutputStream(), signal_args, num_signals_set, signals_sp); else - target.PrintDummySignals(result.GetOutputStream(), + target.PrintDummySignals(result.GetOutputStream(), signal_args); if (num_signals_set > 0) @@ -1909,80 +1909,6 @@ protected: } }; -// CommandObjectProcessTraceSave -#define LLDB_OPTIONS_process_trace_save -#include "CommandOptions.inc" - -#pragma mark CommandObjectProcessTraceSave - -class CommandObjectProcessTraceSave : public CommandObjectParsed { -public: - class CommandOptions : public Options { - public: - CommandOptions() { OptionParsingStarting(nullptr); } - - Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, - ExecutionContext *execution_context) override { - Status error; - const int short_option = m_getopt_table[option_idx].val; - - switch (short_option) { - - case 'd': { - m_directory.SetFile(option_arg, FileSpec::Style::native); - FileSystem::Instance().Resolve(m_directory); - break; - } - default: - llvm_unreachable("Unimplemented option"); - } - return error; - } - - void OptionParsingStarting(ExecutionContext *execution_context) override{}; - - llvm::ArrayRef<OptionDefinition> GetDefinitions() override { - return llvm::makeArrayRef(g_process_trace_save_options); - }; - - FileSpec m_directory; - }; - - Options *GetOptions() override { return &m_options; } - CommandObjectProcessTraceSave(CommandInterpreter &interpreter) - : CommandObjectParsed( - interpreter, "process trace save", - "Save the trace of the current process in the specified directory. " - "The directory will be created if needed. " - "This will also create a file <directory>/trace.json with the main " - "properties of the trace session, along with others files which " - "contain the actual trace data. The trace.json file can be used " - "later as input for the \"trace load\" command to load the trace " - "in LLDB", - "process trace save [<cmd-options>]", - eCommandRequiresProcess | eCommandTryTargetAPILock | - eCommandProcessMustBeLaunched | eCommandProcessMustBePaused | - eCommandProcessMustBeTraced) {} - - ~CommandObjectProcessTraceSave() override = default; - -protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { - ProcessSP process_sp = m_exe_ctx.GetProcessSP(); - - TraceSP trace_sp = process_sp->GetTarget().GetTrace(); - - if (llvm::Error err = trace_sp->SaveLiveTraceToDisk(m_options.m_directory)) - result.AppendError(toString(std::move(err))); - else - result.SetStatus(eReturnStatusSuccessFinishResult); - - return result.Succeeded(); - } - - CommandOptions m_options; -}; - // CommandObjectProcessTraceStop class CommandObjectProcessTraceStop : public CommandObjectParsed { public: @@ -2020,8 +1946,6 @@ public: : CommandObjectMultiword( interpreter, "trace", "Commands for tracing the current process.", "process trace <subcommand> [<subcommand objects>]") { - LoadSubCommand("save", CommandObjectSP( - new CommandObjectProcessTraceSave(interpreter))); LoadSubCommand("start", CommandObjectSP(new CommandObjectProcessTraceStart( interpreter))); LoadSubCommand("stop", CommandObjectSP( diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 2b71f1bc7bc8..51978878c8b9 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -47,12 +47,18 @@ #include "lldb/Target/Thread.h" #include "lldb/Target/ThreadSpec.h" #include "lldb/Utility/Args.h" +#include "lldb/Utility/ConstString.h" +#include "lldb/Utility/FileSpec.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/State.h" #include "lldb/Utility/Timer.h" #include "lldb/lldb-enumerations.h" #include "lldb/lldb-private-enumerations.h" +#include "clang/CodeGen/ObjectFilePCHContainerOperations.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/FrontendActions.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatAdapters.h" @@ -2155,6 +2161,59 @@ protected: } }; +class CommandObjectTargetModulesDumpClangPCMInfo : public CommandObjectParsed { +public: + CommandObjectTargetModulesDumpClangPCMInfo(CommandInterpreter &interpreter) + : CommandObjectParsed( + interpreter, "target modules dump pcm-info", + "Dump information about the given clang module (pcm).") { + // Take a single file argument. + CommandArgumentData arg{eArgTypeFilename, eArgRepeatPlain}; + m_arguments.push_back({arg}); + } + + ~CommandObjectTargetModulesDumpClangPCMInfo() override = default; + +protected: + bool DoExecute(Args &command, CommandReturnObject &result) override { + if (command.GetArgumentCount() != 1) { + result.AppendErrorWithFormat("'%s' takes exactly one pcm path argument.", + m_cmd_name.c_str()); + return false; + } + + const char *pcm_path = command.GetArgumentAtIndex(0); + FileSpec pcm_file{pcm_path}; + + if (pcm_file.GetFileNameExtension().GetStringRef() != ".pcm") { + result.AppendError("file must have a .pcm extension"); + return false; + } + + if (!FileSystem::Instance().Exists(pcm_file)) { + result.AppendError("pcm file does not exist"); + return false; + } + + clang::CompilerInstance compiler; + compiler.createDiagnostics(); + + const char *clang_args[] = {"clang", pcm_path}; + compiler.setInvocation(clang::createInvocation(clang_args)); + + clang::DumpModuleInfoAction dump_module_info; + dump_module_info.OutputStream = &result.GetOutputStream().AsRawOstream(); + // DumpModuleInfoAction requires ObjectFilePCHContainerReader. + compiler.getPCHContainerOperations()->registerReader( + std::make_unique<clang::ObjectFilePCHContainerReader>()); + + if (compiler.ExecuteAction(dump_module_info)) + result.SetStatus(eReturnStatusSuccessFinishResult); + + return result.Succeeded(); + } +}; + #pragma mark CommandObjectTargetModulesDumpClangAST // Clang AST dumping command @@ -2406,10 +2465,10 @@ public: CommandObjectTargetModulesDump(CommandInterpreter &interpreter) : CommandObjectMultiword( interpreter, "target modules dump", - "Commands for dumping information about one or " - "more target modules.", + "Commands for dumping information about one or more target " + "modules.", "target modules dump " - "[headers|symtab|sections|ast|symfile|line-table] " + "[objfile|symtab|sections|ast|symfile|line-table|pcm-info] " "[<file1> <file2> ...]") { LoadSubCommand("objfile", CommandObjectSP( @@ -2429,6 +2488,10 @@ public: LoadSubCommand("line-table", CommandObjectSP(new CommandObjectTargetModulesDumpLineTable( interpreter))); + LoadSubCommand( + "pcm-info", + CommandObjectSP( + new CommandObjectTargetModulesDumpClangPCMInfo(interpreter))); } ~CommandObjectTargetModulesDump() override = default; diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index 993523e06736..ad49d27bb9a7 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -1033,11 +1033,21 @@ protected: line_table->FindLineEntryByAddress(fun_end_addr, function_start, &end_ptr); + // Since not all source lines will contribute code, check if we are + // setting the breakpoint on the exact line number or the nearest + // subsequent line number and set breakpoints at all the line table + // entries of the chosen line number (exact or nearest subsequent). for (uint32_t line_number : line_numbers) { + LineEntry line_entry; + bool exact = false; uint32_t start_idx_ptr = index_ptr; + start_idx_ptr = sc.comp_unit->FindLineEntry( + index_ptr, line_number, nullptr, exact, &line_entry); + if (start_idx_ptr != UINT32_MAX) + line_number = line_entry.line; + exact = true; + start_idx_ptr = index_ptr; while (start_idx_ptr <= end_ptr) { - LineEntry line_entry; - const bool exact = false; start_idx_ptr = sc.comp_unit->FindLineEntry( start_idx_ptr, line_number, nullptr, exact, &line_entry); if (start_idx_ptr == UINT32_MAX) @@ -2164,6 +2174,10 @@ public: m_dumper_options.forwards = true; break; } + case 'k': { + m_dumper_options.show_control_flow_kind = true; + break; + } case 't': { m_dumper_options.show_tsc = true; break; @@ -2337,6 +2351,10 @@ public: m_verbose = true; break; } + case 'j': { + m_json = true; + break; + } default: llvm_unreachable("Unimplemented option"); } @@ -2345,6 +2363,7 @@ public: void OptionParsingStarting(ExecutionContext *execution_context) override { m_verbose = false; + m_json = false; } llvm::ArrayRef<OptionDefinition> GetDefinitions() override { @@ -2353,15 +2372,9 @@ public: // Instance variables to hold the values for command options. bool m_verbose; + bool m_json; }; - bool DoExecute(Args &command, CommandReturnObject &result) override { - Target &target = m_exe_ctx.GetTargetRef(); - result.GetOutputStream().Format("Trace technology: {0}\n", - target.GetTrace()->GetPluginName()); - return CommandObjectIterateOverThreads::DoExecute(command, result); - } - CommandObjectTraceDumpInfo(CommandInterpreter &interpreter) : CommandObjectIterateOverThreads( interpreter, "thread trace dump info", @@ -2383,7 +2396,7 @@ protected: ThreadSP thread_sp = m_exe_ctx.GetProcessPtr()->GetThreadList().FindThreadByID(tid); trace_sp->DumpTraceInfo(*thread_sp, result.GetOutputStream(), - m_options.m_verbose); + m_options.m_verbose, m_options.m_json); return true; } diff --git a/lldb/source/Commands/CommandObjectTrace.cpp b/lldb/source/Commands/CommandObjectTrace.cpp index 17aded9ed2a0..227de2de7065 100644 --- a/lldb/source/Commands/CommandObjectTrace.cpp +++ b/lldb/source/Commands/CommandObjectTrace.cpp @@ -30,6 +30,108 @@ using namespace lldb; using namespace lldb_private; using namespace llvm; +// CommandObjectTraceSave +#define LLDB_OPTIONS_trace_save +#include "CommandOptions.inc" + +#pragma mark CommandObjectTraceSave + +class CommandObjectTraceSave : public CommandObjectParsed { +public: + class CommandOptions : public Options { + public: + CommandOptions() { OptionParsingStarting(nullptr); } + + Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, + ExecutionContext *execution_context) override { + Status error; + const int short_option = m_getopt_table[option_idx].val; + + switch (short_option) { + case 'c': { + m_compact = true; + break; + } + default: + llvm_unreachable("Unimplemented option"); + } + return error; + } + + void OptionParsingStarting(ExecutionContext *execution_context) override { + m_compact = false; + }; + + llvm::ArrayRef<OptionDefinition> GetDefinitions() override { + return llvm::makeArrayRef(g_trace_save_options); + }; + + bool m_compact; + }; + + Options *GetOptions() override { return &m_options; } + + CommandObjectTraceSave(CommandInterpreter &interpreter) + : CommandObjectParsed( + interpreter, "trace save", + "Save the trace of the current target in the specified directory, " + "which will be created if needed. " + "This directory will contain a trace bundle, with all the " + "necessary files the reconstruct the trace session even on a " + "different computer. " + "Part of this bundle is the bundle description file with the name " + "trace.json. This file can be used by the \"trace load\" command " + "to load this trace in LLDB." + "Note: if the current target contains information of multiple " + "processes or targets, they all will be included in the bundle.", + "trace save [<cmd-options>] <bundle_directory>", + eCommandRequiresProcess | eCommandTryTargetAPILock | + eCommandProcessMustBeLaunched | eCommandProcessMustBePaused | + eCommandProcessMustBeTraced) { + CommandArgumentData bundle_dir{eArgTypeDirectoryName, eArgRepeatPlain}; + m_arguments.push_back({bundle_dir}); + } + + void + HandleArgumentCompletion(CompletionRequest &request, + OptionElementVector &opt_element_vector) override { + CommandCompletions::InvokeCommonCompletionCallbacks( + GetCommandInterpreter(), CommandCompletions::eDiskFileCompletion, + request, nullptr); + } + + ~CommandObjectTraceSave() override = default; + +protected: + bool DoExecute(Args &command, CommandReturnObject &result) override { + if (command.size() != 1) { + result.AppendError("a single path to a directory where the trace bundle " + "will be created is required"); + return false; + } + + FileSpec bundle_dir(command[0].ref()); + FileSystem::Instance().Resolve(bundle_dir); + + ProcessSP process_sp = m_exe_ctx.GetProcessSP(); + + TraceSP trace_sp = process_sp->GetTarget().GetTrace(); + + if (llvm::Expected<FileSpec> desc_file = + trace_sp->SaveToDisk(bundle_dir, m_options.m_compact)) { + result.AppendMessageWithFormatv( + "Trace bundle description file written to: {0}", *desc_file); + result.SetStatus(eReturnStatusSuccessFinishResult); + } else { + result.AppendError(toString(desc_file.takeError())); + } + + return result.Succeeded(); + } + + CommandOptions m_options; +}; + // CommandObjectTraceLoad #define LLDB_OPTIONS_trace_load #include "CommandOptions.inc" @@ -75,11 +177,19 @@ public: : CommandObjectParsed( interpreter, "trace load", "Load a post-mortem processor trace session from a trace bundle.", - "trace load") { - CommandArgumentData session_file_arg{eArgTypePath, eArgRepeatPlain}; + "trace load <trace_description_file>") { + CommandArgumentData session_file_arg{eArgTypeFilename, eArgRepeatPlain}; m_arguments.push_back({session_file_arg}); } + void + HandleArgumentCompletion(CompletionRequest &request, + OptionElementVector &opt_element_vector) override { + CommandCompletions::InvokeCommonCompletionCallbacks( + GetCommandInterpreter(), CommandCompletions::eDiskFileCompletion, + request, nullptr); + } + ~CommandObjectTraceLoad() override = default; Options *GetOptions() override { return &m_options; } @@ -284,6 +394,8 @@ CommandObjectTrace::CommandObjectTrace(CommandInterpreter &interpreter) CommandObjectSP(new CommandObjectTraceLoad(interpreter))); LoadSubCommand("dump", CommandObjectSP(new CommandObjectTraceDump(interpreter))); + LoadSubCommand("save", + CommandObjectSP(new CommandObjectTraceSave(interpreter))); LoadSubCommand("schema", CommandObjectSP(new CommandObjectTraceSchema(interpreter))); } diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 7755daa878be..7981917fd8b5 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -300,6 +300,11 @@ let Command = "breakpoint command delete" in { let Command = "disassemble" in { def disassemble_options_bytes : Option<"bytes", "b">, Desc<"Show opcode bytes when disassembling.">; + def disassemble_options_kind : Option<"kind", "k">, + Desc<"Show instruction control flow kind. Refer to the enum " + "`InstructionControlFlowKind` for a list of control flow kind. " + "As an important note, far jumps, far calls and far returns often indicate " + "calls to and from kernel.">; def disassemble_options_context : Option<"context", "C">, Arg<"NumLines">, Desc<"Number of context lines of source to show.">; def disassemble_options_mixed : Option<"mixed", "m">, @@ -783,14 +788,6 @@ let Command = "process save_core" in { "This allows core files to be saved in different formats.">; } -let Command = "process trace save" in { - def process_trace_save_directory: Option<"directory", "d">, - Group<1>, - Arg<"Value">, Required, - Desc<"The directory where the trace will be saved." - "It will be created if it does not exist.">; -} - let Command = "script import" in { def script_import_allow_reload : Option<"allow-reload", "r">, Group<1>, Desc<"Allow the script to be loaded even if it was already loaded before. " @@ -1150,6 +1147,11 @@ let Command = "thread trace dump instructions" in { def thread_trace_dump_instructions_pretty_print: Option<"pretty-json", "J">, Group<1>, Desc<"Dump in JSON format but pretty printing the output for easier readability.">; + def thread_trace_dump_instructions_show_kind : Option<"kind", "k">, Group<1>, + Desc<"Show instruction control flow kind. Refer to the enum " + "`InstructionControlFlowKind` for a list of control flow kind. " + "As an important note, far jumps, far calls and far returns often indicate " + "calls to and from kernel.">; def thread_trace_dump_instructions_show_tsc : Option<"tsc", "t">, Group<1>, Desc<"For each instruction, print the corresponding timestamp counter if " "available.">; @@ -1167,6 +1169,8 @@ let Command = "thread trace dump instructions" in { let Command = "thread trace dump info" in { def thread_trace_dump_info_verbose : Option<"verbose", "v">, Group<1>, Desc<"show verbose thread trace dump info">; + def thread_trace_dump_info_json: Option<"json", "j">, Group<1>, + Desc<"Dump in JSON format.">; } let Command = "type summary add" in { @@ -1349,6 +1353,14 @@ let Command = "trace load" in { "implementation.">; } +let Command = "trace save" in { + def trace_save_compact: Option<"compact", "c">, + Group<1>, + Desc<"Try not to save to disk information irrelevant to the traced " + "processes. Each trace plug-in implements this in a different " + "fashion.">; +} + let Command = "trace dump" in { def trace_dump_verbose : Option<"verbose", "v">, Group<1>, Desc<"Show verbose trace information.">; diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp index f17cd8856a6d..62857c181af8 100644 --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -1835,9 +1835,20 @@ void Debugger::HandleProgressEvent(const lldb::EventSP &event_sp) { // going to show the progress. const uint64_t id = data->GetID(); if (m_current_event_id) { + Log *log = GetLog(LLDBLog::Events); + if (log && log->GetVerbose()) { + StreamString log_stream; + log_stream.AsRawOstream() + << static_cast<void *>(this) << " Debugger(" << GetID() + << ")::HandleProgressEvent( m_current_event_id = " + << *m_current_event_id << ", data = { "; + data->Dump(&log_stream); + log_stream << " } )"; + log->PutString(log_stream.GetString()); + } if (id != *m_current_event_id) return; - if (data->GetCompleted()) + if (data->GetCompleted() == data->GetTotal()) m_current_event_id.reset(); } else { m_current_event_id = id; @@ -1860,7 +1871,7 @@ void Debugger::HandleProgressEvent(const lldb::EventSP &event_sp) { // Print over previous line, if any. output->Printf("\r"); - if (data->GetCompleted()) { + if (data->GetCompleted() == data->GetTotal()) { // Clear the current line. output->Printf("\x1B[2K"); output->Flush(); diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp index 00d92053bc4f..7a9e214748a7 100644 --- a/lldb/source/Core/Disassembler.cpp +++ b/lldb/source/Core/Disassembler.cpp @@ -527,8 +527,11 @@ void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, } const bool show_bytes = (options & eOptionShowBytes) != 0; - inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc, - &prev_sc, nullptr, address_text_size); + const bool show_control_flow_kind = + (options & eOptionShowControlFlowKind) != 0; + inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, + show_control_flow_kind, &exe_ctx, &sc, &prev_sc, nullptr, + address_text_size); strm.EOL(); } else { break; @@ -568,6 +571,334 @@ Instruction::Instruction(const Address &address, AddressClass addr_class) Instruction::~Instruction() = default; +namespace x86 { + +/// These are the three values deciding instruction control flow kind. +/// InstructionLengthDecode function decodes an instruction and get this struct. +/// +/// primary_opcode +/// Primary opcode of the instruction. +/// For one-byte opcode instruction, it's the first byte after prefix. +/// For two- and three-byte opcodes, it's the second byte. +/// +/// opcode_len +/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. +/// +/// modrm +/// ModR/M byte of the instruction. +/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] +/// may contain a register or specify an addressing mode, depending on MOD. +struct InstructionOpcodeAndModrm { + uint8_t primary_opcode; + uint8_t opcode_len; + uint8_t modrm; +}; + +/// Determine the InstructionControlFlowKind based on opcode and modrm bytes. +/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and +/// instruction set. +/// +/// \param[in] opcode_and_modrm +/// Contains primary_opcode byte, its length, and ModR/M byte. +/// Refer to the struct InstructionOpcodeAndModrm for details. +/// +/// \return +/// The control flow kind of the instruction or +/// eInstructionControlFlowKindOther if the instruction doesn't affect +/// the control flow of the program. +lldb::InstructionControlFlowKind +MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { + uint8_t opcode = opcode_and_modrm.primary_opcode; + uint8_t opcode_len = opcode_and_modrm.opcode_len; + uint8_t modrm = opcode_and_modrm.modrm; + + if (opcode_len > 2) + return lldb::eInstructionControlFlowKindOther; + + if (opcode >= 0x70 && opcode <= 0x7F) { + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCondJump; + else + return lldb::eInstructionControlFlowKindOther; + } + + if (opcode >= 0x80 && opcode <= 0x8F) { + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindCondJump; + else + return lldb::eInstructionControlFlowKindOther; + } + + switch (opcode) { + case 0x9A: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0xFF: + if (opcode_len == 1) { + uint8_t modrm_reg = (modrm >> 3) & 7; + if (modrm_reg == 2) + return lldb::eInstructionControlFlowKindCall; + else if (modrm_reg == 3) + return lldb::eInstructionControlFlowKindFarCall; + else if (modrm_reg == 4) + return lldb::eInstructionControlFlowKindJump; + else if (modrm_reg == 5) + return lldb::eInstructionControlFlowKindFarJump; + } + break; + case 0xE8: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCall; + break; + case 0xCD: + case 0xCC: + case 0xCE: + case 0xF1: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0xCF: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0xE9: + case 0xEB: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindJump; + break; + case 0xEA: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarJump; + break; + case 0xE3: + case 0xE0: + case 0xE1: + case 0xE2: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCondJump; + break; + case 0xC3: + case 0xC2: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindReturn; + break; + case 0xCB: + case 0xCA: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0x05: + case 0x34: + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0x35: + case 0x07: + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0x01: + if (opcode_len == 2) { + switch (modrm) { + case 0xc1: + return lldb::eInstructionControlFlowKindFarCall; + case 0xc2: + case 0xc3: + return lldb::eInstructionControlFlowKindFarReturn; + default: + break; + } + } + break; + default: + break; + } + + return lldb::eInstructionControlFlowKindOther; +} + +/// Decode an instruction into opcode, modrm and opcode_len. +/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. +/// Opcodes in x86 are generally the first byte of instruction, though two-byte +/// instructions and prefixes exist. ModR/M is the byte following the opcode +/// and adds additional information for how the instruction is executed. +/// +/// \param[in] inst_bytes +/// Raw bytes of the instruction +/// +/// +/// \param[in] bytes_len +/// The length of the inst_bytes array. +/// +/// \param[in] is_exec_mode_64b +/// If true, the execution mode is 64 bit. +/// +/// \return +/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding +/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition +/// for more details. +/// Otherwise if the given instruction is invalid, returns None. +llvm::Optional<InstructionOpcodeAndModrm> +InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, + bool is_exec_mode_64b) { + int op_idx = 0; + bool prefix_done = false; + InstructionOpcodeAndModrm ret = {0, 0, 0}; + + // In most cases, the primary_opcode is the first byte of the instruction + // but some instructions have a prefix to be skipped for these calculations. + // The following mapping is inspired from libipt's instruction decoding logic + // in `src/pt_ild.c` + while (!prefix_done) { + if (op_idx >= bytes_len) + return llvm::None; + + ret.primary_opcode = inst_bytes[op_idx]; + switch (ret.primary_opcode) { + // prefix_ignore + case 0x26: + case 0x2e: + case 0x36: + case 0x3e: + case 0x64: + case 0x65: + // prefix_osz, prefix_asz + case 0x66: + case 0x67: + // prefix_lock, prefix_f2, prefix_f3 + case 0xf0: + case 0xf2: + case 0xf3: + op_idx++; + break; + + // prefix_rex + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + if (is_exec_mode_64b) + op_idx++; + else + prefix_done = true; + break; + + // prefix_vex_c4, c5 + case 0xc5: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + + ret.opcode_len = 2; + ret.primary_opcode = inst_bytes[op_idx + 2]; + ret.modrm = inst_bytes[op_idx + 3]; + return ret; + + case 0xc4: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; + ret.primary_opcode = inst_bytes[op_idx + 3]; + ret.modrm = inst_bytes[op_idx + 4]; + return ret; + + // prefix_evex + case 0x62: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; + ret.primary_opcode = inst_bytes[op_idx + 4]; + ret.modrm = inst_bytes[op_idx + 5]; + return ret; + + default: + prefix_done = true; + break; + } + } // prefix done + + ret.primary_opcode = inst_bytes[op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + ret.opcode_len = 1; + + // If the first opcode is 0F, it's two- or three- byte opcodes. + if (ret.primary_opcode == 0x0F) { + ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte + + if (ret.primary_opcode == 0x38) { + ret.opcode_len = 3; + ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte + ret.modrm = inst_bytes[op_idx + 1]; + } else if (ret.primary_opcode == 0x3A) { + ret.opcode_len = 3; + ret.primary_opcode = inst_bytes[++op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + } else if ((ret.primary_opcode & 0xf8) == 0x38) { + ret.opcode_len = 0; + ret.primary_opcode = inst_bytes[++op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + } else if (ret.primary_opcode == 0x0F) { + ret.opcode_len = 3; + // opcode is 0x0F, no needs to update + ret.modrm = inst_bytes[op_idx + 1]; + } else { + ret.opcode_len = 2; + ret.modrm = inst_bytes[op_idx + 1]; + } + } + + return ret; +} + +lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, + Opcode m_opcode) { + llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None; + + if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { + // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes + return lldb::eInstructionControlFlowKindUnknown; + } + + // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. + // These are the three values deciding instruction control flow kind. + ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), + m_opcode.GetByteSize(), is_exec_mode_64b); + if (!ret) + return lldb::eInstructionControlFlowKindUnknown; + else + return MapOpcodeIntoControlFlowKind(ret.value()); +} + +} // namespace x86 + +lldb::InstructionControlFlowKind +Instruction::GetControlFlowKind(const ArchSpec &arch) { + if (arch.GetTriple().getArch() == llvm::Triple::x86) + return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode); + else if (arch.GetTriple().getArch() == llvm::Triple::x86_64) + return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode); + else + return eInstructionControlFlowKindUnknown; // not implemented +} + AddressClass Instruction::GetAddressClass() { if (m_address_class == AddressClass::eInvalid) m_address_class = m_address.GetAddressClass(); @@ -576,6 +907,7 @@ AddressClass Instruction::GetAddressClass() { void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, bool show_address, bool show_bytes, + bool show_control_flow_kind, const ExecutionContext *exe_ctx, const SymbolContext *sym_ctx, const SymbolContext *prev_sym_ctx, @@ -613,6 +945,38 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, } } + if (show_control_flow_kind) { + switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) { + case eInstructionControlFlowKindUnknown: + ss.Printf("%-12s", "unknown"); + break; + case eInstructionControlFlowKindOther: + ss.Printf("%-12s", "other"); + break; + case eInstructionControlFlowKindCall: + ss.Printf("%-12s", "call"); + break; + case eInstructionControlFlowKindReturn: + ss.Printf("%-12s", "return"); + break; + case eInstructionControlFlowKindJump: + ss.Printf("%-12s", "jump"); + break; + case eInstructionControlFlowKindCondJump: + ss.Printf("%-12s", "cond jump"); + break; + case eInstructionControlFlowKindFarCall: + ss.Printf("%-12s", "far call"); + break; + case eInstructionControlFlowKindFarReturn: + ss.Printf("%-12s", "far return"); + break; + case eInstructionControlFlowKindFarJump: + ss.Printf("%-12s", "far jump"); + break; + } + } + const size_t opcode_pos = ss.GetSizeOfLastLine(); // The default opcode size of 7 characters is plenty for most architectures @@ -957,6 +1321,7 @@ InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) { } void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes, + bool show_control_flow_kind, const ExecutionContext *exe_ctx) { const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize(); collection::const_iterator pos, begin, end; @@ -975,8 +1340,9 @@ void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes, pos != end; ++pos) { if (pos != begin) s->EOL(); - (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx, - nullptr, nullptr, disassembly_format, 0); + (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, + show_control_flow_kind, exe_ctx, nullptr, nullptr, + disassembly_format, 0); } } @@ -994,7 +1360,7 @@ InstructionList::GetIndexOfNextBranchInstruction(uint32_t start, size_t num_instructions = m_instructions.size(); uint32_t next_branch = UINT32_MAX; - + if (found_calls) *found_calls = false; for (size_t i = start; i < num_instructions; i++) { diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp index 4ef1df1aeb0f..dc96a3454b72 100644 --- a/lldb/source/Core/DumpDataExtractor.cpp +++ b/lldb/source/Core/DumpDataExtractor.cpp @@ -170,10 +170,11 @@ static lldb::offset_t DumpInstructions(const DataExtractor &DE, Stream *s, offset += bytes_consumed; const bool show_address = base_addr != LLDB_INVALID_ADDRESS; const bool show_bytes = true; + const bool show_control_flow_kind = true; ExecutionContext exe_ctx; exe_scope->CalculateExecutionContext(exe_ctx); - disassembler_sp->GetInstructionList().Dump(s, show_address, show_bytes, - &exe_ctx); + disassembler_sp->GetInstructionList().Dump( + s, show_address, show_bytes, show_control_flow_kind, &exe_ctx); } } } else diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index 41c21e1dc326..893e20837124 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -144,9 +144,7 @@ Module::Module(const ModuleSpec &module_spec) module_spec.GetArchitecture().GetArchitectureName(), module_spec.GetFileSpec().GetPath().c_str(), module_spec.GetObjectName().IsEmpty() ? "" : "(", - module_spec.GetObjectName().IsEmpty() - ? "" - : module_spec.GetObjectName().AsCString(""), + module_spec.GetObjectName().AsCString(""), module_spec.GetObjectName().IsEmpty() ? "" : ")"); auto data_sp = module_spec.GetData(); @@ -254,8 +252,7 @@ Module::Module(const FileSpec &file_spec, const ArchSpec &arch, LLDB_LOGF(log, "%p Module::Module((%s) '%s%s%s%s')", static_cast<void *>(this), m_arch.GetArchitectureName(), m_file.GetPath().c_str(), m_object_name.IsEmpty() ? "" : "(", - m_object_name.IsEmpty() ? "" : m_object_name.AsCString(""), - m_object_name.IsEmpty() ? "" : ")"); + m_object_name.AsCString(""), m_object_name.IsEmpty() ? "" : ")"); } Module::Module() : m_file_has_changed(false), m_first_file_changed_log(false) { @@ -283,8 +280,7 @@ Module::~Module() { LLDB_LOGF(log, "%p Module::~Module((%s) '%s%s%s%s')", static_cast<void *>(this), m_arch.GetArchitectureName(), m_file.GetPath().c_str(), m_object_name.IsEmpty() ? "" : "(", - m_object_name.IsEmpty() ? "" : m_object_name.AsCString(""), - m_object_name.IsEmpty() ? "" : ")"); + m_object_name.AsCString(""), m_object_name.IsEmpty() ? "" : ")"); // Release any auto pointers before we start tearing down our member // variables since the object file and symbol files might need to make // function calls back into this module object. The ordering is important @@ -1000,8 +996,7 @@ void Module::FindTypes( FindTypes_Impl(type_basename_const_str, CompilerDeclContext(), max_matches, searched_symbol_files, typesmap); if (typesmap.GetSize()) - typesmap.RemoveMismatchedTypes(std::string(type_scope), - std::string(type_basename), type_class, + typesmap.RemoveMismatchedTypes(type_scope, type_basename, type_class, exact_match); } else { // The type is not in a namespace/class scope, just search for it by @@ -1011,15 +1006,13 @@ void Module::FindTypes( // class prefix (like "struct", "class", "union", "typedef" etc). FindTypes_Impl(ConstString(type_basename), CompilerDeclContext(), UINT_MAX, searched_symbol_files, typesmap); - typesmap.RemoveMismatchedTypes(std::string(type_scope), - std::string(type_basename), type_class, + typesmap.RemoveMismatchedTypes(type_scope, type_basename, type_class, exact_match); } else { FindTypes_Impl(name, CompilerDeclContext(), UINT_MAX, searched_symbol_files, typesmap); if (exact_match) { - std::string name_str(name.AsCString("")); - typesmap.RemoveMismatchedTypes(std::string(type_scope), name_str, + typesmap.RemoveMismatchedTypes(type_scope, name.GetStringRef(), type_class, exact_match); } } diff --git a/lldb/source/Core/ValueObjectVariable.cpp b/lldb/source/Core/ValueObjectVariable.cpp index 8e89503a8a76..4e2bd12c1053 100644 --- a/lldb/source/Core/ValueObjectVariable.cpp +++ b/lldb/source/Core/ValueObjectVariable.cpp @@ -13,7 +13,7 @@ #include "lldb/Core/Declaration.h" #include "lldb/Core/Module.h" #include "lldb/Core/Value.h" -#include "lldb/Expression/DWARFExpression.h" +#include "lldb/Expression/DWARFExpressionList.h" #include "lldb/Symbol/Function.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Symbol/SymbolContext.h" @@ -127,17 +127,16 @@ bool ValueObjectVariable::UpdateValue() { m_error.Clear(); Variable *variable = m_variable_sp.get(); - DWARFExpression &expr = variable->LocationExpression(); + DWARFExpressionList &expr_list = variable->LocationExpressionList(); if (variable->GetLocationIsConstantValueData()) { // expr doesn't contain DWARF bytes, it contains the constant variable // value bytes themselves... - if (expr.GetExpressionData(m_data)) { - if (m_data.GetDataStart() && m_data.GetByteSize()) + if (expr_list.GetExpressionData(m_data)) { + if (m_data.GetDataStart() && m_data.GetByteSize()) m_value.SetBytes(m_data.GetDataStart(), m_data.GetByteSize()); m_value.SetContext(Value::ContextType::Variable, variable); - } - else + } else m_error.SetErrorString("empty constant data"); // constant bytes can't be edited - sorry m_resolved_value.SetContext(Value::ContextType::Invalid, nullptr); @@ -151,7 +150,7 @@ bool ValueObjectVariable::UpdateValue() { m_data.SetAddressByteSize(target->GetArchitecture().GetAddressByteSize()); } - if (expr.IsLocationList()) { + if (!expr_list.IsAlwaysValidSingleExpr()) { SymbolContext sc; variable->CalculateSymbolContext(&sc); if (sc.function) @@ -160,8 +159,8 @@ bool ValueObjectVariable::UpdateValue() { target); } Value old_value(m_value); - if (expr.Evaluate(&exe_ctx, nullptr, loclist_base_load_addr, nullptr, - nullptr, m_value, &m_error)) { + if (expr_list.Evaluate(&exe_ctx, nullptr, loclist_base_load_addr, nullptr, + nullptr, m_value, &m_error)) { m_resolved_value = m_value; m_value.SetContext(Value::ContextType::Variable, variable); @@ -246,7 +245,7 @@ bool ValueObjectVariable::UpdateValue() { m_resolved_value.SetContext(Value::ContextType::Invalid, nullptr); } } - + return m_error.Success(); } diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index 1f11907dc64c..9e6b21fc25ea 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -45,29 +45,10 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::dwarf; -static lldb::addr_t -ReadAddressFromDebugAddrSection(const DWARFUnit *dwarf_cu, - uint32_t index) { - uint32_t index_size = dwarf_cu->GetAddressByteSize(); - dw_offset_t addr_base = dwarf_cu->GetAddrBase(); - lldb::offset_t offset = addr_base + index * index_size; - const DWARFDataExtractor &data = - dwarf_cu->GetSymbolFileDWARF().GetDWARFContext().getOrLoadAddrData(); - if (data.ValidOffsetForDataOfSize(offset, index_size)) - return data.GetMaxU64_unchecked(&offset, index_size); - return LLDB_INVALID_ADDRESS; -} - // DWARFExpression constructor -DWARFExpression::DWARFExpression() : m_module_wp(), m_data() {} - -DWARFExpression::DWARFExpression(lldb::ModuleSP module_sp, - const DataExtractor &data, - const DWARFUnit *dwarf_cu) - : m_module_wp(), m_data(data), m_dwarf_cu(dwarf_cu) { - if (module_sp) - m_module_wp = module_sp; -} +DWARFExpression::DWARFExpression() : m_data() {} + +DWARFExpression::DWARFExpression(const DataExtractor &data) : m_data(data) {} // Destructor DWARFExpression::~DWARFExpression() = default; @@ -86,71 +67,19 @@ void DWARFExpression::UpdateValue(uint64_t const_value, m_data.SetAddressByteSize(addr_byte_size); } -void DWARFExpression::DumpLocation(Stream *s, const DataExtractor &data, - lldb::DescriptionLevel level, +void DWARFExpression::DumpLocation(Stream *s, lldb::DescriptionLevel level, ABI *abi) const { - llvm::DWARFExpression(data.GetAsLLVM(), data.GetAddressByteSize()) + llvm::DWARFExpression(m_data.GetAsLLVM(), m_data.GetAddressByteSize()) .print(s->AsRawOstream(), llvm::DIDumpOptions(), abi ? &abi->GetMCRegisterInfo() : nullptr, nullptr); } -void DWARFExpression::SetLocationListAddresses(addr_t cu_file_addr, - addr_t func_file_addr) { - m_loclist_addresses = LoclistAddresses{cu_file_addr, func_file_addr}; -} - -int DWARFExpression::GetRegisterKind() { return m_reg_kind; } +RegisterKind DWARFExpression::GetRegisterKind() const { return m_reg_kind; } void DWARFExpression::SetRegisterKind(RegisterKind reg_kind) { m_reg_kind = reg_kind; } -bool DWARFExpression::IsLocationList() const { - return bool(m_loclist_addresses); -} - -namespace { -/// Implement enough of the DWARFObject interface in order to be able to call -/// DWARFLocationTable::dumpLocationList. We don't have access to a real -/// DWARFObject here because DWARFExpression is used in non-DWARF scenarios too. -class DummyDWARFObject final: public llvm::DWARFObject { -public: - DummyDWARFObject(bool IsLittleEndian) : IsLittleEndian(IsLittleEndian) {} - - bool isLittleEndian() const override { return IsLittleEndian; } - - llvm::Optional<llvm::RelocAddrEntry> find(const llvm::DWARFSection &Sec, - uint64_t Pos) const override { - return llvm::None; - } -private: - bool IsLittleEndian; -}; -} - -void DWARFExpression::GetDescription(Stream *s, lldb::DescriptionLevel level, - ABI *abi) const { - if (IsLocationList()) { - // We have a location list - lldb::offset_t offset = 0; - std::unique_ptr<llvm::DWARFLocationTable> loctable_up = - m_dwarf_cu->GetLocationTable(m_data); - - llvm::MCRegisterInfo *MRI = abi ? &abi->GetMCRegisterInfo() : nullptr; - llvm::DIDumpOptions DumpOpts; - DumpOpts.RecoverableErrorHandler = [&](llvm::Error E) { - s->AsRawOstream() << "error: " << toString(std::move(E)); - }; - loctable_up->dumpLocationList( - &offset, s->AsRawOstream(), - llvm::object::SectionedAddress{m_loclist_addresses->cu_file_addr}, MRI, - DummyDWARFObject(m_data.GetByteOrder() == eByteOrderLittle), nullptr, - DumpOpts, s->GetIndentLevel() + 2); - } else { - // We have a normal location that contains DW_OP location opcodes - DumpLocation(s, m_data, level, abi); - } -} static bool ReadRegisterValueAsScalar(RegisterContext *reg_ctx, lldb::RegisterKind reg_kind, @@ -409,11 +338,10 @@ static offset_t GetOpcodeDataSize(const DataExtractor &data, return LLDB_INVALID_OFFSET; } -lldb::addr_t DWARFExpression::GetLocation_DW_OP_addr(uint32_t op_addr_idx, +lldb::addr_t DWARFExpression::GetLocation_DW_OP_addr(const DWARFUnit *dwarf_cu, + uint32_t op_addr_idx, bool &error) const { error = false; - if (IsLocationList()) - return LLDB_INVALID_ADDRESS; lldb::offset_t offset = 0; uint32_t curr_op_addr_idx = 0; while (m_data.ValidOffset(offset)) { @@ -423,19 +351,18 @@ lldb::addr_t DWARFExpression::GetLocation_DW_OP_addr(uint32_t op_addr_idx, const lldb::addr_t op_file_addr = m_data.GetAddress(&offset); if (curr_op_addr_idx == op_addr_idx) return op_file_addr; - else - ++curr_op_addr_idx; + ++curr_op_addr_idx; } else if (op == DW_OP_GNU_addr_index || op == DW_OP_addrx) { uint64_t index = m_data.GetULEB128(&offset); if (curr_op_addr_idx == op_addr_idx) { - if (!m_dwarf_cu) { + if (!dwarf_cu) { error = true; break; } - return ReadAddressFromDebugAddrSection(m_dwarf_cu, index); - } else - ++curr_op_addr_idx; + return dwarf_cu->ReadAddressFromDebugAddrSection(index); + } + ++curr_op_addr_idx; } else { const offset_t op_arg_size = GetOpcodeDataSize(m_data, offset, op); if (op_arg_size == LLDB_INVALID_OFFSET) { @@ -449,8 +376,6 @@ lldb::addr_t DWARFExpression::GetLocation_DW_OP_addr(uint32_t op_addr_idx, } bool DWARFExpression::Update_DW_OP_addr(lldb::addr_t file_addr) { - if (IsLocationList()) - return false; lldb::offset_t offset = 0; while (m_data.ValidOffset(offset)) { const uint8_t op = m_data.GetU8(&offset); @@ -487,11 +412,6 @@ bool DWARFExpression::Update_DW_OP_addr(lldb::addr_t file_addr) { } bool DWARFExpression::ContainsThreadLocalStorage() const { - // We are assuming for now that any thread local variable will not have a - // location list. This has been true for all thread local variables we have - // seen so far produced by any compiler. - if (IsLocationList()) - return false; lldb::offset_t offset = 0; while (m_data.ValidOffset(offset)) { const uint8_t op = m_data.GetU8(&offset); @@ -501,27 +421,18 @@ bool DWARFExpression::ContainsThreadLocalStorage() const { const offset_t op_arg_size = GetOpcodeDataSize(m_data, offset, op); if (op_arg_size == LLDB_INVALID_OFFSET) return false; - else - offset += op_arg_size; + offset += op_arg_size; } return false; } bool DWARFExpression::LinkThreadLocalStorage( - lldb::ModuleSP new_module_sp, std::function<lldb::addr_t(lldb::addr_t file_addr)> const &link_address_callback) { - // We are assuming for now that any thread local variable will not have a - // location list. This has been true for all thread local variables we have - // seen so far produced by any compiler. - if (IsLocationList()) - return false; - const uint32_t addr_byte_size = m_data.GetAddressByteSize(); // We have to make a copy of the data as we don't know if this data is from a // read only memory mapped buffer, so we duplicate all of the data first, // then modify it, and if all goes well, we then replace the data for this // expression. - // Make en encoder that contains a copy of the location expression data so we // can write the address into the buffer using the correct byte order. DataEncoder encoder(m_data.GetDataStart(), m_data.GetByteSize(), @@ -593,42 +504,10 @@ bool DWARFExpression::LinkThreadLocalStorage( } } - // If we linked the TLS address correctly, update the module so that when the - // expression is evaluated it can resolve the file address to a load address - // and read the - // TLS data - m_module_wp = new_module_sp; m_data.SetData(encoder.GetDataBuffer()); return true; } -bool DWARFExpression::LocationListContainsAddress(addr_t func_load_addr, - lldb::addr_t addr) const { - if (func_load_addr == LLDB_INVALID_ADDRESS || addr == LLDB_INVALID_ADDRESS) - return false; - - if (!IsLocationList()) - return false; - - return GetLocationExpression(func_load_addr, addr) != llvm::None; -} - -bool DWARFExpression::DumpLocationForAddress(Stream *s, - lldb::DescriptionLevel level, - addr_t func_load_addr, - addr_t address, ABI *abi) { - if (!IsLocationList()) { - DumpLocation(s, m_data, level, abi); - return true; - } - if (llvm::Optional<DataExtractor> expr = - GetLocationExpression(func_load_addr, address)) { - DumpLocation(s, *expr, level, abi); - return true; - } - return false; -} - static bool Evaluate_DW_OP_entry_value(std::vector<Value> &stack, ExecutionContext *exe_ctx, RegisterContext *reg_ctx, @@ -824,10 +703,10 @@ static bool Evaluate_DW_OP_entry_value(std::vector<Value> &stack, // TODO: Add support for DW_OP_push_object_address within a DW_OP_entry_value // subexpresion whenever llvm does. Value result; - const DWARFExpression ¶m_expr = matched_param->LocationInCaller; + const DWARFExpressionList ¶m_expr = matched_param->LocationInCaller; if (!param_expr.Evaluate(&parent_exe_ctx, parent_frame->GetRegisterContext().get(), - /*loclist_base_load_addr=*/LLDB_INVALID_ADDRESS, + LLDB_INVALID_ADDRESS, /*initial_value_ptr=*/nullptr, /*object_address_ptr=*/nullptr, result, error_ptr)) { LLDB_LOG(log, @@ -839,63 +718,6 @@ static bool Evaluate_DW_OP_entry_value(std::vector<Value> &stack, return true; } -bool DWARFExpression::Evaluate(ExecutionContextScope *exe_scope, - lldb::addr_t loclist_base_load_addr, - const Value *initial_value_ptr, - const Value *object_address_ptr, Value &result, - Status *error_ptr) const { - ExecutionContext exe_ctx(exe_scope); - return Evaluate(&exe_ctx, nullptr, loclist_base_load_addr, initial_value_ptr, - object_address_ptr, result, error_ptr); -} - -bool DWARFExpression::Evaluate(ExecutionContext *exe_ctx, - RegisterContext *reg_ctx, - lldb::addr_t func_load_addr, - const Value *initial_value_ptr, - const Value *object_address_ptr, Value &result, - Status *error_ptr) const { - ModuleSP module_sp = m_module_wp.lock(); - - if (IsLocationList()) { - Address pc; - StackFrame *frame = nullptr; - if (!reg_ctx || !reg_ctx->GetPCForSymbolication(pc)) { - frame = exe_ctx->GetFramePtr(); - if (!frame) - return false; - RegisterContextSP reg_ctx_sp = frame->GetRegisterContext(); - if (!reg_ctx_sp) - return false; - reg_ctx_sp->GetPCForSymbolication(pc); - } - - if (func_load_addr != LLDB_INVALID_ADDRESS) { - if (!pc.IsValid()) { - if (error_ptr) - error_ptr->SetErrorString("Invalid PC in frame."); - return false; - } - - Target *target = exe_ctx->GetTargetPtr(); - if (llvm::Optional<DataExtractor> expr = GetLocationExpression( - func_load_addr, pc.GetLoadAddress(target))) { - return DWARFExpression::Evaluate( - exe_ctx, reg_ctx, module_sp, *expr, m_dwarf_cu, m_reg_kind, - initial_value_ptr, object_address_ptr, result, error_ptr); - } - } - if (error_ptr) - error_ptr->SetErrorString("variable not available"); - return false; - } - - // Not a location list, just a single expression. - return DWARFExpression::Evaluate(exe_ctx, reg_ctx, module_sp, m_data, - m_dwarf_cu, m_reg_kind, initial_value_ptr, - object_address_ptr, result, error_ptr); -} - namespace { /// The location description kinds described by the DWARF v5 /// specification. Composite locations are handled out-of-band and @@ -2670,7 +2492,7 @@ bool DWARFExpression::Evaluate( return false; } uint64_t index = opcodes.GetULEB128(&offset); - lldb::addr_t value = ReadAddressFromDebugAddrSection(dwarf_cu, index); + lldb::addr_t value = dwarf_cu->ReadAddressFromDebugAddrSection(index); stack.push_back(Scalar(value)); stack.back().SetValueType(Value::ValueType::FileAddress); } break; @@ -2690,7 +2512,7 @@ bool DWARFExpression::Evaluate( return false; } uint64_t index = opcodes.GetULEB128(&offset); - lldb::addr_t value = ReadAddressFromDebugAddrSection(dwarf_cu, index); + lldb::addr_t value = dwarf_cu->ReadAddressFromDebugAddrSection(index); stack.push_back(Scalar(value)); } break; @@ -2743,61 +2565,16 @@ bool DWARFExpression::Evaluate( return true; // Return true on success } -static DataExtractor ToDataExtractor(const llvm::DWARFLocationExpression &loc, - ByteOrder byte_order, uint32_t addr_size) { - auto buffer_sp = - std::make_shared<DataBufferHeap>(loc.Expr.data(), loc.Expr.size()); - return DataExtractor(buffer_sp, byte_order, addr_size); -} - -bool DWARFExpression::DumpLocations(Stream *s, lldb::DescriptionLevel level, - addr_t load_function_start, addr_t addr, - ABI *abi) { - if (!IsLocationList()) { - DumpLocation(s, m_data, level, abi); - return true; - } - bool dump_all = addr == LLDB_INVALID_ADDRESS; - llvm::ListSeparator separator; - auto callback = [&](llvm::DWARFLocationExpression loc) -> bool { - if (loc.Range && - (dump_all || (loc.Range->LowPC <= addr && addr < loc.Range->HighPC))) { - uint32_t addr_size = m_data.GetAddressByteSize(); - DataExtractor data = ToDataExtractor(loc, m_data.GetByteOrder(), - m_data.GetAddressByteSize()); - s->AsRawOstream() << separator; - s->PutCString("["); - s->AsRawOstream() << llvm::format_hex(loc.Range->LowPC, - 2 + 2 * addr_size); - s->PutCString(", "); - s->AsRawOstream() << llvm::format_hex(loc.Range->HighPC, - 2 + 2 * addr_size); - s->PutCString(") -> "); - DumpLocation(s, data, level, abi); - return dump_all; - } - return true; - }; - if (!GetLocationExpressions(load_function_start, callback)) - return false; - return true; -} - -bool DWARFExpression::GetLocationExpressions( - addr_t load_function_start, - llvm::function_ref<bool(llvm::DWARFLocationExpression)> callback) const { - if (load_function_start == LLDB_INVALID_ADDRESS) - return false; - - Log *log = GetLog(LLDBLog::Expressions); - +bool DWARFExpression::ParseDWARFLocationList( + const DWARFUnit *dwarf_cu, const DataExtractor &data, + DWARFExpressionList *location_list) { + location_list->Clear(); std::unique_ptr<llvm::DWARFLocationTable> loctable_up = - m_dwarf_cu->GetLocationTable(m_data); - - uint64_t offset = 0; + dwarf_cu->GetLocationTable(data); + Log *log = GetLog(LLDBLog::Expressions); auto lookup_addr = [&](uint32_t index) -> llvm::Optional<llvm::object::SectionedAddress> { - addr_t address = ReadAddressFromDebugAddrSection(m_dwarf_cu, index); + addr_t address = dwarf_cu->ReadAddressFromDebugAddrSection(index); if (address == LLDB_INVALID_ADDRESS) return llvm::None; return llvm::object::SectionedAddress{address}; @@ -2807,18 +2584,17 @@ bool DWARFExpression::GetLocationExpressions( LLDB_LOG_ERROR(log, loc.takeError(), "{0}"); return true; } - if (loc->Range) { - // This relocates low_pc and high_pc by adding the difference between the - // function file address, and the actual address it is loaded in memory. - addr_t slide = load_function_start - m_loclist_addresses->func_file_addr; - loc->Range->LowPC += slide; - loc->Range->HighPC += slide; - } - return callback(*loc); + auto buffer_sp = + std::make_shared<DataBufferHeap>(loc->Expr.data(), loc->Expr.size()); + DWARFExpression expr = DWARFExpression(DataExtractor( + buffer_sp, data.GetByteOrder(), data.GetAddressByteSize())); + location_list->AddExpression(loc->Range->LowPC, loc->Range->HighPC, expr); + return true; }; llvm::Error error = loctable_up->visitAbsoluteLocationList( - offset, llvm::object::SectionedAddress{m_loclist_addresses->cu_file_addr}, + 0, llvm::object::SectionedAddress{dwarf_cu->GetBaseAddress()}, lookup_addr, process_list); + location_list->Sort(); if (error) { LLDB_LOG_ERROR(log, std::move(error), "{0}"); return false; @@ -2826,23 +2602,8 @@ bool DWARFExpression::GetLocationExpressions( return true; } -llvm::Optional<DataExtractor> -DWARFExpression::GetLocationExpression(addr_t load_function_start, - addr_t addr) const { - llvm::Optional<DataExtractor> data; - auto callback = [&](llvm::DWARFLocationExpression loc) { - if (loc.Range && loc.Range->LowPC <= addr && addr < loc.Range->HighPC) { - data = ToDataExtractor(loc, m_data.GetByteOrder(), - m_data.GetAddressByteSize()); - } - return !data; - }; - GetLocationExpressions(load_function_start, callback); - return data; -} - -bool DWARFExpression::MatchesOperand(StackFrame &frame, - const Instruction::Operand &operand) { +bool DWARFExpression::MatchesOperand( + StackFrame &frame, const Instruction::Operand &operand) const { using namespace OperandMatchers; RegisterContextSP reg_ctx_sp = frame.GetRegisterContext(); @@ -2850,28 +2611,7 @@ bool DWARFExpression::MatchesOperand(StackFrame &frame, return false; } - DataExtractor opcodes; - if (IsLocationList()) { - SymbolContext sc = frame.GetSymbolContext(eSymbolContextFunction); - if (!sc.function) - return false; - - addr_t load_function_start = - sc.function->GetAddressRange().GetBaseAddress().GetFileAddress(); - if (load_function_start == LLDB_INVALID_ADDRESS) - return false; - - addr_t pc = frame.GetFrameCodeAddressForSymbolication().GetLoadAddress( - frame.CalculateTarget().get()); - - if (llvm::Optional<DataExtractor> expr = - GetLocationExpression(load_function_start, pc)) - opcodes = std::move(*expr); - else - return false; - } else - opcodes = m_data; - + DataExtractor opcodes(m_data); lldb::offset_t op_offset = 0; uint8_t opcode = opcodes.GetU8(&op_offset); @@ -2879,7 +2619,7 @@ bool DWARFExpression::MatchesOperand(StackFrame &frame, if (opcode == DW_OP_fbreg) { int64_t offset = opcodes.GetSLEB128(&op_offset); - DWARFExpression *fb_expr = frame.GetFrameBaseExpression(nullptr); + DWARFExpressionList *fb_expr = frame.GetFrameBaseExpression(nullptr); if (!fb_expr) { return false; } diff --git a/lldb/source/Expression/DWARFExpressionList.cpp b/lldb/source/Expression/DWARFExpressionList.cpp new file mode 100644 index 000000000000..5cf722c42fa9 --- /dev/null +++ b/lldb/source/Expression/DWARFExpressionList.cpp @@ -0,0 +1,248 @@ +//===-- DWARFExpressionList.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Expression/DWARFExpressionList.h" +#include "Plugins/SymbolFile/DWARF/DWARFUnit.h" +#include "lldb/Symbol/Function.h" +#include "lldb/Target/RegisterContext.h" +#include "lldb/Target/StackFrame.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" + +using namespace lldb; +using namespace lldb_private; + +bool DWARFExpressionList::IsAlwaysValidSingleExpr() const { + return GetAlwaysValidExpr() != nullptr; +} + +const DWARFExpression * DWARFExpressionList::GetAlwaysValidExpr() const { + if (m_exprs.GetSize() != 1) + return nullptr; + const auto *expr = m_exprs.GetEntryAtIndex(0); + if (expr->base == 0 && expr->size == LLDB_INVALID_ADDRESS) + return &expr->data; + return nullptr; +} + +bool DWARFExpressionList::AddExpression(addr_t base, addr_t end, + DWARFExpression expr) { + if (IsAlwaysValidSingleExpr() || base >= end) + return false; + m_exprs.Append({base, end - base, expr}); + return true; +} + +bool DWARFExpressionList::GetExpressionData(DataExtractor &data, + lldb::addr_t func_load_addr, + lldb::addr_t file_addr) const { + if (const DWARFExpression *expr = + GetExpressionAtAddress(func_load_addr, file_addr)) + return expr->GetExpressionData(data); + return false; +} + +bool DWARFExpressionList::ContainsAddress(lldb::addr_t func_load_addr, + lldb::addr_t addr) const { + if (IsAlwaysValidSingleExpr()) + return true; + return GetExpressionAtAddress(func_load_addr, addr) != nullptr; +} + +const DWARFExpression * +DWARFExpressionList::GetExpressionAtAddress(lldb::addr_t func_load_addr, + lldb::addr_t load_addr) const { + if (const DWARFExpression *expr = GetAlwaysValidExpr()) + return expr; + if (func_load_addr == LLDB_INVALID_ADDRESS) + func_load_addr = m_func_file_addr; + addr_t addr = load_addr - func_load_addr + m_func_file_addr; + uint32_t index = m_exprs.FindEntryIndexThatContains(addr); + if (index == UINT32_MAX) + return nullptr; + return &m_exprs.GetEntryAtIndex(index)->data; +} + +DWARFExpression * +DWARFExpressionList::GetMutableExpressionAtAddress(lldb::addr_t func_load_addr, + lldb::addr_t load_addr) { + if (IsAlwaysValidSingleExpr()) + return &m_exprs.GetMutableEntryAtIndex(0)->data; + if (func_load_addr == LLDB_INVALID_ADDRESS) + func_load_addr = m_func_file_addr; + addr_t addr = load_addr - func_load_addr + m_func_file_addr; + uint32_t index = m_exprs.FindEntryIndexThatContains(addr); + if (index == UINT32_MAX) + return nullptr; + return &m_exprs.GetMutableEntryAtIndex(index)->data; +} + +bool DWARFExpressionList::ContainsThreadLocalStorage() const { + // We are assuming for now that any thread local variable will not have a + // location list. This has been true for all thread local variables we have + // seen so far produced by any compiler. + if (!IsAlwaysValidSingleExpr()) + return false; + + const DWARFExpression &expr = m_exprs.GetEntryRef(0).data; + return expr.ContainsThreadLocalStorage(); +} + +bool DWARFExpressionList::LinkThreadLocalStorage( + lldb::ModuleSP new_module_sp, + std::function<lldb::addr_t(lldb::addr_t file_addr)> const + &link_address_callback) { + // We are assuming for now that any thread local variable will not have a + // location list. This has been true for all thread local variables we have + // seen so far produced by any compiler. + if (!IsAlwaysValidSingleExpr()) + return false; + + DWARFExpression &expr = m_exprs.GetEntryRef(0).data; + // If we linked the TLS address correctly, update the module so that when the + // expression is evaluated it can resolve the file address to a load address + // and read the TLS data + if (expr.LinkThreadLocalStorage(link_address_callback)) + m_module_wp = new_module_sp; + return true; +} + +bool DWARFExpressionList::MatchesOperand( + StackFrame &frame, const Instruction::Operand &operand) const { + RegisterContextSP reg_ctx_sp = frame.GetRegisterContext(); + if (!reg_ctx_sp) { + return false; + } + const DWARFExpression *expr = nullptr; + if (IsAlwaysValidSingleExpr()) + expr = &m_exprs.GetEntryAtIndex(0)->data; + else { + SymbolContext sc = frame.GetSymbolContext(eSymbolContextFunction); + if (!sc.function) + return false; + + addr_t load_function_start = + sc.function->GetAddressRange().GetBaseAddress().GetFileAddress(); + if (load_function_start == LLDB_INVALID_ADDRESS) + return false; + + addr_t pc = frame.GetFrameCodeAddressForSymbolication().GetFileAddress(); + expr = GetExpressionAtAddress(LLDB_INVALID_ADDRESS, pc); + } + if (!expr) + return false; + return expr->MatchesOperand(frame, operand); +} + +bool DWARFExpressionList::DumpLocations(Stream *s, lldb::DescriptionLevel level, + lldb::addr_t func_load_addr, + lldb::addr_t file_addr, + ABI *abi) const { + llvm::raw_ostream &os = s->AsRawOstream(); + llvm::ListSeparator separator; + if (const DWARFExpression *expr = GetAlwaysValidExpr()) { + expr->DumpLocation(s, level, abi); + return true; + } + for (const Entry &entry : *this) { + addr_t load_base = entry.GetRangeBase() + func_load_addr - m_func_file_addr; + addr_t load_end = entry.GetRangeEnd() + func_load_addr - m_func_file_addr; + if (file_addr != LLDB_INVALID_ADDRESS && + (file_addr < load_base || file_addr >= load_end)) + continue; + const auto &expr = entry.data; + DataExtractor data; + expr.GetExpressionData(data); + uint32_t addr_size = data.GetAddressByteSize(); + + os << separator; + os << "["; + os << llvm::format_hex(load_base, 2 + 2 * addr_size); + os << ", "; + os << llvm::format_hex(load_end, 2 + 2 * addr_size); + os << ") -> "; + expr.DumpLocation(s, level, abi); + if (file_addr != LLDB_INVALID_ADDRESS) + break; + } + return true; +} + +void DWARFExpressionList::GetDescription(Stream *s, + lldb::DescriptionLevel level, + ABI *abi) const { + llvm::raw_ostream &os = s->AsRawOstream(); + if (IsAlwaysValidSingleExpr()) { + m_exprs.Back()->data.DumpLocation(s, level, abi); + return; + } + os << llvm::format("0x%8.8" PRIx64 ": ", 0); + for (const Entry &entry : *this) { + const auto &expr = entry.data; + DataExtractor data; + expr.GetExpressionData(data); + uint32_t addr_size = data.GetAddressByteSize(); + os << "\n"; + os.indent(s->GetIndentLevel() + 2); + os << "["; + llvm::DWARFFormValue::dumpAddress(os, addr_size, entry.GetRangeBase()); + os << ", "; + llvm::DWARFFormValue::dumpAddress(os, addr_size, entry.GetRangeEnd()); + os << "): "; + expr.DumpLocation(s, level, abi); + } +} + +bool DWARFExpressionList::Evaluate(ExecutionContext *exe_ctx, + RegisterContext *reg_ctx, + lldb::addr_t func_load_addr, + const Value *initial_value_ptr, + const Value *object_address_ptr, + Value &result, Status *error_ptr) const { + ModuleSP module_sp = m_module_wp.lock(); + DataExtractor data; + RegisterKind reg_kind; + DWARFExpression expr; + if (IsAlwaysValidSingleExpr()) { + expr = m_exprs.Back()->data; + } else { + Address pc; + StackFrame *frame = nullptr; + if (!reg_ctx || !reg_ctx->GetPCForSymbolication(pc)) { + if (exe_ctx) + frame = exe_ctx->GetFramePtr(); + if (!frame) + return false; + RegisterContextSP reg_ctx_sp = frame->GetRegisterContext(); + if (!reg_ctx_sp) + return false; + reg_ctx_sp->GetPCForSymbolication(pc); + } + + if (!pc.IsValid()) { + if (error_ptr) + error_ptr->SetErrorString("Invalid PC in frame."); + return false; + } + addr_t pc_load_addr = pc.GetLoadAddress(exe_ctx->GetTargetPtr()); + const DWARFExpression *entry = + GetExpressionAtAddress(func_load_addr, pc_load_addr); + if (!entry) { + if (error_ptr) { + error_ptr->SetErrorString("variable not available"); + } + return false; + } + expr = *entry; + } + expr.GetExpressionData(data); + reg_kind = expr.GetRegisterKind(); + return DWARFExpression::Evaluate(exe_ctx, reg_ctx, module_sp, data, + m_dwarf_cu, reg_kind, initial_value_ptr, + object_address_ptr, result, error_ptr); +} diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp index 49fa72f7112d..6b710084faf7 100644 --- a/lldb/source/Expression/IRExecutionUnit.cpp +++ b/lldb/source/Expression/IRExecutionUnit.cpp @@ -200,7 +200,9 @@ Status IRExecutionUnit::DisassembleFunction(Stream &stream, UINT32_MAX, false, false); InstructionList &instruction_list = disassembler_sp->GetInstructionList(); - instruction_list.Dump(&stream, true, true, &exe_ctx); + instruction_list.Dump(&stream, true, true, /*show_control_flow_kind=*/true, + &exe_ctx); + return ret; } diff --git a/lldb/source/Expression/Materializer.cpp b/lldb/source/Expression/Materializer.cpp index 9ee2d983ddfc..965a96b7f909 100644 --- a/lldb/source/Expression/Materializer.cpp +++ b/lldb/source/Expression/Materializer.cpp @@ -520,7 +520,7 @@ public: if (data.GetByteSize() < m_variable_sp->GetType()->GetByteSize(scope)) { if (data.GetByteSize() == 0 && - !m_variable_sp->LocationExpression().IsValid()) { + !m_variable_sp->LocationExpressionList().IsValid()) { err.SetErrorStringWithFormat("the variable '%s' has no location, " "it may have been optimized out", m_variable_sp->GetName().AsCString()); diff --git a/lldb/source/Interpreter/CommandObject.cpp b/lldb/source/Interpreter/CommandObject.cpp index c92fec53a55e..910d740625e9 100644 --- a/lldb/source/Interpreter/CommandObject.cpp +++ b/lldb/source/Interpreter/CommandObject.cpp @@ -547,7 +547,7 @@ CommandObject::LookupArgumentName(llvm::StringRef arg_name) { const ArgumentTableEntry *table = GetArgumentTable(); for (int i = 0; i < eArgTypeLastArg; ++i) if (arg_name == table[i].arg_name) - return_type = g_arguments_data[i].arg_type; + return_type = GetArgumentTable()[i].arg_type; return return_type; } @@ -924,14 +924,14 @@ const char *CommandObject::GetArgumentTypeAsCString( const lldb::CommandArgumentType arg_type) { assert(arg_type < eArgTypeLastArg && "Invalid argument type passed to GetArgumentTypeAsCString"); - return g_arguments_data[arg_type].arg_name; + return GetArgumentTable()[arg_type].arg_name; } const char *CommandObject::GetArgumentDescriptionAsCString( const lldb::CommandArgumentType arg_type) { assert(arg_type < eArgTypeLastArg && "Invalid argument type passed to GetArgumentDescriptionAsCString"); - return g_arguments_data[arg_type].help_text; + return GetArgumentTable()[arg_type].help_text; } Target &CommandObject::GetDummyTarget() { @@ -1041,7 +1041,7 @@ static llvm::StringRef arch_helper() { return g_archs_help.GetString(); } -CommandObject::ArgumentTableEntry CommandObject::g_arguments_data[] = { +static constexpr CommandObject::ArgumentTableEntry g_arguments_data[] = { // clang-format off { eArgTypeAddress, "address", CommandCompletions::eNoCompletion, { nullptr, false }, "A valid address in the target program's execution space." }, { eArgTypeAddressOrExpression, "address-expression", CommandCompletions::eNoCompletion, { nullptr, false }, "An expression that resolves to an address." }, @@ -1134,17 +1134,18 @@ CommandObject::ArgumentTableEntry CommandObject::g_arguments_data[] = { { eArgTypeSaveCoreStyle, "corefile-style", CommandCompletions::eNoCompletion, { nullptr, false }, "The type of corefile that lldb will try to create, dependant on this target's capabilities." }, { eArgTypeLogHandler, "log-handler", CommandCompletions::eNoCompletion, { nullptr, false }, "The log handle that will be used to write out log messages." }, { eArgTypeSEDStylePair, "substitution-pair", CommandCompletions::eNoCompletion, { nullptr, false }, "A sed-style pattern and target pair." }, + { eArgTypeRecognizerID, "frame-recognizer-id", CommandCompletions::eNoCompletion, { nullptr, false }, "The ID for a stack frame recognizer." }, { eArgTypeConnectURL, "process-connect-url", CommandCompletions::eNoCompletion, { nullptr, false }, "A URL-style specification for a remote connection." }, { eArgTypeTargetID, "target-id", CommandCompletions::eNoCompletion, { nullptr, false }, "The index ID for an lldb Target." }, { eArgTypeStopHookID, "stop-hook-id", CommandCompletions::eNoCompletion, { nullptr, false }, "The ID you receive when you create a stop-hook." } // clang-format on }; +static_assert( + (sizeof(g_arguments_data) / sizeof(CommandObject::ArgumentTableEntry)) == + eArgTypeLastArg, + "g_arguments_data out of sync with CommandArgumentType enumeration"); + const CommandObject::ArgumentTableEntry *CommandObject::GetArgumentTable() { - // If this assertion fires, then the table above is out of date with the - // CommandArgumentType enumeration - static_assert((sizeof(CommandObject::g_arguments_data) / - sizeof(CommandObject::ArgumentTableEntry)) == eArgTypeLastArg, - ""); - return CommandObject::g_arguments_data; + return g_arguments_data; } diff --git a/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.h b/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.h index e74b9126404e..a9c2ed9c2f14 100644 --- a/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.h +++ b/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.h @@ -40,10 +40,15 @@ public: bool RegisterIsVolatile(const lldb_private::RegisterInfo *reg_info) override; - // In Windows_x86_64 ABI, stack will always be maintained 16-byte aligned + // In Windows_x86_64 ABI requires that the stack will be maintained 16-byte + // aligned. + // When ntdll invokes callbacks such as KiUserExceptionDispatcher or + // KiUserCallbackDispatcher, those functions won't have a properly 16-byte + // aligned stack - but tolerate unwinding through them by relaxing the + // requirement to 8 bytes. bool CallFrameAddressIsValid(lldb::addr_t cfa) override { - if (cfa & (16ull - 1ull)) - return false; // Not 16 byte aligned + if (cfa & (8ull - 1ull)) + return false; // Not 8 byte aligned if (cfa == 0) return false; // Zero is not a valid stack address return true; diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp index c85c66442510..a774d5b61cfe 100644 --- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp +++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp @@ -1181,11 +1181,7 @@ DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, // If any AArch64 variant, enable latest ISA with all extensions. if (triple.isAArch64()) { - features_str += "+v9.3a,"; - std::vector<llvm::StringRef> features; - // Get all possible features - llvm::AArch64::getExtensionFeatures(-1, features); - features_str += llvm::join(features, ","); + features_str += "+all,"; if (triple.getVendor() == llvm::Triple::Apple) cpu = "apple-latest"; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index cc45871bcd71..4305a9982343 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -772,10 +772,6 @@ void ClangExpressionDeclMap::LookUpLldbClass(NameSearchContext &context) { return; AddContextClassType(context, TypeFromUser(m_ctx_obj->GetCompilerType())); - - m_struct_vars->m_object_pointer_type = - TypeFromUser(ctx_obj_ptr->GetCompilerType()); - return; } @@ -810,18 +806,6 @@ void ClangExpressionDeclMap::LookUpLldbClass(NameSearchContext &context) { class_qual_type.getAsString()); AddContextClassType(context, class_user_type); - - if (method_decl->isInstance()) { - // self is a pointer to the object - - QualType class_pointer_type = - method_decl->getASTContext().getPointerType(class_qual_type); - - TypeFromUser self_user_type(class_pointer_type.getAsOpaquePtr(), - function_decl_ctx.GetTypeSystem()); - - m_struct_vars->m_object_pointer_type = self_user_type; - } return; } @@ -852,8 +836,6 @@ void ClangExpressionDeclMap::LookUpLldbClass(NameSearchContext &context) { ClangUtil::GetQualType(pointee_type).getAsString()); AddContextClassType(context, pointee_type); - TypeFromUser this_user_type(this_type->GetFullCompilerType()); - m_struct_vars->m_object_pointer_type = this_user_type; } } @@ -869,10 +851,6 @@ void ClangExpressionDeclMap::LookUpLldbObjCClass(NameSearchContext &context) { return; AddOneType(context, TypeFromUser(m_ctx_obj->GetCompilerType())); - - m_struct_vars->m_object_pointer_type = - TypeFromUser(ctx_obj_ptr->GetCompilerType()); - return; } @@ -917,28 +895,6 @@ void ClangExpressionDeclMap::LookUpLldbObjCClass(NameSearchContext &context) { ClangUtil::ToString(interface_type)); AddOneType(context, class_user_type); - - if (method_decl->isInstanceMethod()) { - // self is a pointer to the object - - QualType class_pointer_type = - method_decl->getASTContext().getObjCObjectPointerType( - QualType(interface_type, 0)); - - TypeFromUser self_user_type(class_pointer_type.getAsOpaquePtr(), - function_decl_ctx.GetTypeSystem()); - - m_struct_vars->m_object_pointer_type = self_user_type; - } else { - // self is a Class pointer - QualType class_type = method_decl->getASTContext().getObjCClassType(); - - TypeFromUser self_user_type(class_type.getAsOpaquePtr(), - function_decl_ctx.GetTypeSystem()); - - m_struct_vars->m_object_pointer_type = self_user_type; - } - return; } // This branch will get hit if we are executing code in the context of @@ -981,10 +937,6 @@ void ClangExpressionDeclMap::LookUpLldbObjCClass(NameSearchContext &context) { TypeFromUser class_user_type(self_clang_type); AddOneType(context, class_user_type); - - TypeFromUser self_user_type(self_type->GetFullCompilerType()); - - m_struct_vars->m_object_pointer_type = self_user_type; } void ClangExpressionDeclMap::LookupLocalVarNamespace( @@ -1485,15 +1437,14 @@ bool ClangExpressionDeclMap::GetVariableValue(VariableSP &var, return false; } - DWARFExpression &var_location_expr = var->LocationExpression(); + DWARFExpressionList &var_location_list = var->LocationExpressionList(); Target *target = m_parser_vars->m_exe_ctx.GetTargetPtr(); Status err; if (var->GetLocationIsConstantValueData()) { DataExtractor const_value_extractor; - - if (var_location_expr.GetExpressionData(const_value_extractor)) { + if (var_location_list.GetExpressionData(const_value_extractor)) { var_location = Value(const_value_extractor.GetDataStart(), const_value_extractor.GetByteSize()); var_location.SetValueType(Value::ValueType::HostAddress); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h index e39dc587bc43..f968f859cc72 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h @@ -353,7 +353,7 @@ private: /// The following values contain layout information for the materialized /// struct, but are not specific to a single materialization struct StructVars { - StructVars() : m_result_name(), m_object_pointer_type(nullptr, nullptr) {} + StructVars() = default; lldb::offset_t m_struct_alignment = 0; ///< The alignment of the struct in bytes. @@ -364,8 +364,6 @@ private: /// added since). ConstString m_result_name; ///< The name of the result variable ($1, for example) - TypeFromUser m_object_pointer_type; ///< The type of the "this" variable, if - ///one exists }; std::unique_ptr<StructVars> m_struct_vars; diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index 82f825871593..89bee3e000c0 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -911,6 +911,14 @@ static void LoadLibCxxFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { "std::map iterator synthetic children", ConstString("^std::__[[:alnum:]]+::__map_iterator<.+>$"), stl_synth_flags, true); + + AddCXXSynthetic( + cpp_category_sp, + lldb_private::formatters:: + LibCxxUnorderedMapIteratorSyntheticFrontEndCreator, + "std::unordered_map iterator synthetic children", + ConstString("^std::__[[:alnum:]]+::__hash_map_(const_)?iterator<.+>$"), + stl_synth_flags, true); } static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp index eaaa16413b1e..3b04b3a1b2ac 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -27,6 +27,7 @@ #include "Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.h" #include "Plugins/TypeSystem/Clang/TypeSystemClang.h" +#include "lldb/lldb-enumerations.h" #include <tuple> using namespace lldb; @@ -283,6 +284,22 @@ bool lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd::Update() { llvm::dyn_cast_or_null<TypeSystemClang>(pair_type.GetTypeSystem()); if (!ast_ctx) return false; + + // Mimick layout of std::__tree_iterator::__ptr_ and read it in + // from process memory. + // + // The following shows the contiguous block of memory: + // + // +-----------------------------+ class __tree_end_node + // __ptr_ | pointer __left_; | + // +-----------------------------+ class __tree_node_base + // | pointer __right_; | + // | __parent_pointer __parent_; | + // | bool __is_black_; | + // +-----------------------------+ class __tree_node + // | __node_value_type __value_; | <<< our key/value pair + // +-----------------------------+ + // CompilerType tree_node_type = ast_ctx->CreateStructForIdentifier( ConstString(), {{"ptr0", @@ -359,6 +376,156 @@ lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEndCreator( : nullptr); } +lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: + LibCxxUnorderedMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp) + : SyntheticChildrenFrontEnd(*valobj_sp) { + if (valobj_sp) + Update(); +} + +bool lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: + Update() { + m_pair_sp.reset(); + m_iter_ptr = nullptr; + + ValueObjectSP valobj_sp = m_backend.GetSP(); + if (!valobj_sp) + return false; + + TargetSP target_sp(valobj_sp->GetTargetSP()); + + if (!target_sp) + return false; + + if (!valobj_sp) + return false; + + auto exprPathOptions = ValueObject::GetValueForExpressionPathOptions() + .DontCheckDotVsArrowSyntax() + .SetSyntheticChildrenTraversal( + ValueObject::GetValueForExpressionPathOptions:: + SyntheticChildrenTraversal::None); + + // This must be a ValueObject* because it is a child of the ValueObject we + // are producing children for it if were a ValueObjectSP, we would end up + // with a loop (iterator -> synthetic -> child -> parent == iterator) and + // that would in turn leak memory by never allowing the ValueObjects to die + // and free their memory. + m_iter_ptr = + valobj_sp + ->GetValueForExpressionPath(".__i_.__node_", nullptr, nullptr, + exprPathOptions, nullptr) + .get(); + + if (m_iter_ptr) { + auto iter_child( + valobj_sp->GetChildMemberWithName(ConstString("__i_"), true)); + if (!iter_child) { + m_iter_ptr = nullptr; + return false; + } + + CompilerType node_type(iter_child->GetCompilerType() + .GetTypeTemplateArgument(0) + .GetPointeeType()); + + CompilerType pair_type(node_type.GetTypeTemplateArgument(0)); + + std::string name; + uint64_t bit_offset_ptr; + uint32_t bitfield_bit_size_ptr; + bool is_bitfield_ptr; + + pair_type = pair_type.GetFieldAtIndex( + 0, name, &bit_offset_ptr, &bitfield_bit_size_ptr, &is_bitfield_ptr); + if (!pair_type) { + m_iter_ptr = nullptr; + return false; + } + + uint64_t addr = m_iter_ptr->GetValueAsUnsigned(LLDB_INVALID_ADDRESS); + m_iter_ptr = nullptr; + + if (addr == 0 || addr == LLDB_INVALID_ADDRESS) + return false; + + TypeSystemClang *ast_ctx = + llvm::dyn_cast_or_null<TypeSystemClang>(pair_type.GetTypeSystem()); + if (!ast_ctx) + return false; + + // Mimick layout of std::__hash_iterator::__node_ and read it in + // from process memory. + // + // The following shows the contiguous block of memory: + // + // +-----------------------------+ class __hash_node_base + // __node_ | __next_pointer __next_; | + // +-----------------------------+ class __hash_node + // | size_t __hash_; | + // | __node_value_type __value_; | <<< our key/value pair + // +-----------------------------+ + // + CompilerType tree_node_type = ast_ctx->CreateStructForIdentifier( + ConstString(), + {{"__next_", + ast_ctx->GetBasicType(lldb::eBasicTypeVoid).GetPointerType()}, + {"__hash_", ast_ctx->GetBasicType(lldb::eBasicTypeUnsignedLongLong)}, + {"__value_", pair_type}}); + llvm::Optional<uint64_t> size = tree_node_type.GetByteSize(nullptr); + if (!size) + return false; + WritableDataBufferSP buffer_sp(new DataBufferHeap(*size, 0)); + ProcessSP process_sp(target_sp->GetProcessSP()); + Status error; + process_sp->ReadMemory(addr, buffer_sp->GetBytes(), + buffer_sp->GetByteSize(), error); + if (error.Fail()) + return false; + DataExtractor extractor(buffer_sp, process_sp->GetByteOrder(), + process_sp->GetAddressByteSize()); + auto pair_sp = CreateValueObjectFromData( + "pair", extractor, valobj_sp->GetExecutionContextRef(), tree_node_type); + if (pair_sp) + m_pair_sp = pair_sp->GetChildAtIndex(2, true); + } + + return false; +} + +size_t lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: + CalculateNumChildren() { + return 2; +} + +lldb::ValueObjectSP lldb_private::formatters:: + LibCxxUnorderedMapIteratorSyntheticFrontEnd::GetChildAtIndex(size_t idx) { + if (m_pair_sp) + return m_pair_sp->GetChildAtIndex(idx, true); + return lldb::ValueObjectSP(); +} + +bool lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: + MightHaveChildren() { + return true; +} + +size_t lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: + GetIndexOfChildWithName(ConstString name) { + if (name == "first") + return 0; + if (name == "second") + return 1; + return UINT32_MAX; +} + +SyntheticChildrenFrontEnd * +lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEndCreator( + CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) { + return (valobj_sp ? new LibCxxUnorderedMapIteratorSyntheticFrontEnd(valobj_sp) + : nullptr); +} + /* (lldb) fr var ibeg --raw --ptr-depth 1 -T (std::__1::__wrap_iter<int *>) ibeg = { @@ -547,101 +714,77 @@ bool lldb_private::formatters::LibcxxContainerSummaryProvider( } /// The field layout in a libc++ string (cap, side, data or data, size, cap). -enum LibcxxStringLayoutMode { - eLibcxxStringLayoutModeCSD = 0, - eLibcxxStringLayoutModeDSC = 1, - eLibcxxStringLayoutModeInvalid = 0xffff -}; +namespace { +enum class StringLayout { CSD, DSC }; +} /// Determine the size in bytes of \p valobj (a libc++ std::string object) and /// extract its data payload. Return the size + payload pair. // TODO: Support big-endian architectures. static llvm::Optional<std::pair<uint64_t, ValueObjectSP>> ExtractLibcxxStringInfo(ValueObject &valobj) { - ValueObjectSP dataval_sp(valobj.GetChildAtIndexPath({0, 0, 0, 0})); - if (!dataval_sp) + ValueObjectSP valobj_r_sp = + valobj.GetChildMemberWithName(ConstString("__r_"), /*can_create=*/true); + if (!valobj_r_sp || !valobj_r_sp->GetError().Success()) return {}; - if (!dataval_sp->GetError().Success()) + + // __r_ is a compressed_pair of the actual data and the allocator. The data we + // want is in the first base class. + ValueObjectSP valobj_r_base_sp = + valobj_r_sp->GetChildAtIndex(0, /*can_create=*/true); + if (!valobj_r_base_sp) return {}; - ValueObjectSP layout_decider( - dataval_sp->GetChildAtIndexPath(llvm::ArrayRef<size_t>({0, 0}))); + ValueObjectSP valobj_rep_sp = valobj_r_base_sp->GetChildMemberWithName( + ConstString("__value_"), /*can_create=*/true); + if (!valobj_rep_sp) + return {}; - // this child should exist - if (!layout_decider) + ValueObjectSP l = valobj_rep_sp->GetChildMemberWithName(ConstString("__l"), + /*can_create=*/true); + if (!l) return {}; - ConstString g_data_name("__data_"); - ConstString g_size_name("__size_"); + StringLayout layout = l->GetIndexOfChildWithName(ConstString("__data_")) == 0 + ? StringLayout::DSC + : StringLayout::CSD; + bool short_mode = false; // this means the string is in short-mode and the // data is stored inline bool using_bitmasks = true; // Whether the class uses bitmasks for the mode // flag (pre-D123580). uint64_t size; - LibcxxStringLayoutMode layout = (layout_decider->GetName() == g_data_name) - ? eLibcxxStringLayoutModeDSC - : eLibcxxStringLayoutModeCSD; uint64_t size_mode_value = 0; - ValueObjectSP short_sp(dataval_sp->GetChildAtIndex(1, true)); + ValueObjectSP short_sp = valobj_rep_sp->GetChildMemberWithName( + ConstString("__s"), /*can_create=*/true); if (!short_sp) return {}; - ValueObjectSP short_fields_sp; ValueObjectSP is_long = short_sp->GetChildMemberWithName(ConstString("__is_long_"), true); - if (is_long) { - short_fields_sp = short_sp; - } else { - // After D128285, we need to access the `__is_long_` and `__size_` fields - // from a packed anonymous struct - short_fields_sp = short_sp->GetChildAtIndex(0, true); - is_long = short_sp->GetChildMemberWithName(ConstString("__is_long_"), true); - } + ValueObjectSP size_sp = + short_sp->GetChildAtNamePath({ConstString("__size_")}); + if (!size_sp) + return {}; if (is_long) { using_bitmasks = false; short_mode = !is_long->GetValueAsUnsigned(/*fail_value=*/0); - if (ValueObjectSP size_member = - dataval_sp->GetChildAtNamePath({ConstString("__s"), ConstString("__size_")})) - size = size_member->GetValueAsUnsigned(/*fail_value=*/0); - else - return {}; - } else if (layout == eLibcxxStringLayoutModeDSC) { - llvm::SmallVector<size_t, 3> size_mode_locations[] = { - {1, 2}, // Post-c3d0205ee771 layout. This was in use for only a brief - // period, so we can delete it if it becomes a burden. - {1, 1, 0}, - {1, 1, 1}, - }; - ValueObjectSP size_mode; - for (llvm::ArrayRef<size_t> loc : size_mode_locations) { - size_mode = dataval_sp->GetChildAtIndexPath(loc); - if (size_mode && size_mode->GetName() == g_size_name) - break; - } - - if (!size_mode) - return {}; - - size_mode_value = (size_mode->GetValueAsUnsigned(0)); - short_mode = ((size_mode_value & 0x80) == 0); + size = size_sp->GetValueAsUnsigned(/*fail_value=*/0); } else { - ValueObjectSP size_mode(dataval_sp->GetChildAtIndexPath({1, 0, 0})); - if (!size_mode) - return {}; - - size_mode_value = (size_mode->GetValueAsUnsigned(0)); - short_mode = ((size_mode_value & 1) == 0); + // The string mode is encoded in the size field. + size_mode_value = size_sp->GetValueAsUnsigned(0); + uint8_t mode_mask = layout == StringLayout::DSC ? 0x80 : 1; + short_mode = (size_mode_value & mode_mask) == 0; } if (short_mode) { ValueObjectSP location_sp = - short_sp->GetChildMemberWithName(g_data_name, true); + short_sp->GetChildMemberWithName(ConstString("__data_"), true); if (using_bitmasks) - size = (layout == eLibcxxStringLayoutModeDSC) - ? size_mode_value - : ((size_mode_value >> 1) % 256); + size = (layout == StringLayout::DSC) ? size_mode_value + : ((size_mode_value >> 1) % 256); // When the small-string optimization takes place, the data must fit in the // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's @@ -656,10 +799,6 @@ ExtractLibcxxStringInfo(ValueObject &valobj) { return std::make_pair(size, location_sp); } - ValueObjectSP l(dataval_sp->GetChildAtIndex(0, true)); - if (!l) - return {}; - // we can use the layout_decider object as the data pointer ValueObjectSP location_sp = l->GetChildMemberWithName(ConstString("__data_"), /*can_create=*/true); @@ -667,19 +806,11 @@ ExtractLibcxxStringInfo(ValueObject &valobj) { l->GetChildMemberWithName(ConstString("__size_"), /*can_create=*/true); ValueObjectSP capacity_vo = l->GetChildMemberWithName(ConstString("__cap_"), /*can_create=*/true); - if (!capacity_vo) { - // After D128285, we need to access the `__cap_` field from a packed - // anonymous struct - if (ValueObjectSP packed_fields_sp = l->GetChildAtIndex(0, true)) { - ValueObjectSP capacity_vo = packed_fields_sp->GetChildMemberWithName( - ConstString("__cap_"), /*can_create=*/true); - } - } if (!size_vo || !location_sp || !capacity_vo) return {}; size = size_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET); uint64_t capacity = capacity_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET); - if (!using_bitmasks && layout == eLibcxxStringLayoutModeCSD) + if (!using_bitmasks && layout == StringLayout::CSD) capacity *= 2; if (size == LLDB_INVALID_OFFSET || capacity == LLDB_INVALID_OFFSET || capacity < size) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h index b4e789e65b51..b5ade4af8574 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h @@ -103,6 +103,56 @@ SyntheticChildrenFrontEnd * LibCxxMapIteratorSyntheticFrontEndCreator(CXXSyntheticChildren *, lldb::ValueObjectSP); +/// Formats libcxx's std::unordered_map iterators +/// +/// In raw form a std::unordered_map::iterator is represented as follows: +/// +/// (lldb) var it --raw --ptr-depth 1 +/// (std::__1::__hash_map_iterator< +/// std::__1::__hash_iterator< +/// std::__1::__hash_node< +/// std::__1::__hash_value_type< +/// std::__1::basic_string<char, std::__1::char_traits<char>, +/// std::__1::allocator<char> >, std::__1::basic_string<char, +/// std::__1::char_traits<char>, std::__1::allocator<char> > >, +/// void *> *> >) +/// it = { +/// __i_ = { +/// __node_ = 0x0000600001700040 { +/// __next_ = 0x0000600001704000 +/// } +/// } +/// } +class LibCxxUnorderedMapIteratorSyntheticFrontEnd + : public SyntheticChildrenFrontEnd { +public: + LibCxxUnorderedMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); + + ~LibCxxUnorderedMapIteratorSyntheticFrontEnd() override = default; + + size_t CalculateNumChildren() override; + + lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + + bool Update() override; + + bool MightHaveChildren() override; + + size_t GetIndexOfChildWithName(ConstString name) override; + +private: + ValueObject *m_iter_ptr = nullptr; ///< Held, not owned. Child of iterator + ///< ValueObject supplied at construction. + + lldb::ValueObjectSP m_pair_sp; ///< ValueObject for the key/value pair + ///< that the iterator currently points + ///< to. +}; + +SyntheticChildrenFrontEnd * +LibCxxUnorderedMapIteratorSyntheticFrontEndCreator(CXXSyntheticChildren *, + lldb::ValueObjectSP); + SyntheticChildrenFrontEnd * LibCxxVectorIteratorSyntheticFrontEndCreator(CXXSyntheticChildren *, lldb::ValueObjectSP); diff --git a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp index a2522372f5af..9fe222eceedc 100644 --- a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp +++ b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp @@ -40,6 +40,8 @@ typedef struct ar_hdr { using namespace lldb; using namespace lldb_private; +using namespace llvm::object; + LLDB_PLUGIN_DEFINE(ObjectContainerBSDArchive) ObjectContainerBSDArchive::Object::Object() : ar_name() {} @@ -55,6 +57,74 @@ void ObjectContainerBSDArchive::Object::Clear() { file_size = 0; } +lldb::offset_t ObjectContainerBSDArchive::Object::ExtractFromThin( + const DataExtractor &data, lldb::offset_t offset, + llvm::StringRef stringTable) { + size_t ar_name_len = 0; + std::string str; + char *err; + + // File header + // + // The common format is as follows. + // + // Offset Length Name Format + // 0 16 File name ASCII right padded with spaces (no spaces + // allowed in file name) + // 16 12 File mod Decimal as cstring right padded with + // spaces + // 28 6 Owner ID Decimal as cstring right padded with + // spaces + // 34 6 Group ID Decimal as cstring right padded with + // spaces + // 40 8 File mode Octal as cstring right padded with + // spaces + // 48 10 File byte size Decimal as cstring right padded with + // spaces + // 58 2 File magic 0x60 0x0A + + // Make sure there is enough data for the file header and bail if not + if (!data.ValidOffsetForDataOfSize(offset, 60)) + return LLDB_INVALID_OFFSET; + + str.assign((const char *)data.GetData(&offset, 16), 16); + if (!(llvm::StringRef(str).startswith("//") || stringTable.empty())) { + // Strip off any trailing spaces. + const size_t last_pos = str.find_last_not_of(' '); + if (last_pos != std::string::npos) { + if (last_pos + 1 < 16) + str.erase(last_pos + 1); + } + int start = strtoul(str.c_str() + 1, &err, 10); + int end = stringTable.find('\n', start); + str.assign(stringTable.data() + start, end - start - 1); + ar_name.SetCString(str.c_str()); + } + + str.assign((const char *)data.GetData(&offset, 12), 12); + modification_time = strtoul(str.c_str(), &err, 10); + + str.assign((const char *)data.GetData(&offset, 6), 6); + uid = strtoul(str.c_str(), &err, 10); + + str.assign((const char *)data.GetData(&offset, 6), 6); + gid = strtoul(str.c_str(), &err, 10); + + str.assign((const char *)data.GetData(&offset, 8), 8); + mode = strtoul(str.c_str(), &err, 8); + + str.assign((const char *)data.GetData(&offset, 10), 10); + size = strtoul(str.c_str(), &err, 10); + + str.assign((const char *)data.GetData(&offset, 2), 2); + if (str == ARFMAG) { + file_offset = offset; + file_size = size - ar_name_len; + return offset; + } + return LLDB_INVALID_OFFSET; +} + lldb::offset_t ObjectContainerBSDArchive::Object::Extract(const DataExtractor &data, lldb::offset_t offset) { @@ -136,9 +206,10 @@ ObjectContainerBSDArchive::Object::Extract(const DataExtractor &data, ObjectContainerBSDArchive::Archive::Archive(const lldb_private::ArchSpec &arch, const llvm::sys::TimePoint<> &time, lldb::offset_t file_offset, - lldb_private::DataExtractor &data) + lldb_private::DataExtractor &data, + ArchiveType archive_type) : m_arch(arch), m_modification_time(time), m_file_offset(file_offset), - m_objects(), m_data(data) {} + m_objects(), m_data(data), m_archive_type(archive_type) {} ObjectContainerBSDArchive::Archive::~Archive() = default; @@ -163,6 +234,48 @@ size_t ObjectContainerBSDArchive::Archive::ParseObjects() { // Now sort all of the object name pointers m_object_name_to_index_map.Sort(); + } else if (str == ThinArchiveMagic) { + Object obj; + size_t obj_idx; + + // Retrieve symbol table + offset = obj.ExtractFromThin(data, offset, ""); + if (offset == LLDB_INVALID_OFFSET) + return m_objects.size(); + obj_idx = m_objects.size(); + m_objects.push_back(obj); + // Insert all of the C strings out of order for now... + m_object_name_to_index_map.Append(obj.ar_name, obj_idx); + offset += obj.file_size; + obj.Clear(); + + // Retrieve string table + offset = obj.ExtractFromThin(data, offset, ""); + if (offset == LLDB_INVALID_OFFSET) + return m_objects.size(); + obj_idx = m_objects.size(); + m_objects.push_back(obj); + // Insert all of the C strings out of order for now... + m_object_name_to_index_map.Append(obj.ar_name, obj_idx); + // Extract string table + llvm::StringRef strtab((const char *)data.GetData(&offset, obj.size), + obj.size); + obj.Clear(); + + // Retrieve object files + do { + offset = obj.ExtractFromThin(data, offset, strtab); + if (offset == LLDB_INVALID_OFFSET) + break; + obj_idx = m_objects.size(); + m_objects.push_back(obj); + // Insert all of the C strings out of order for now... + m_object_name_to_index_map.Append(obj.ar_name, obj_idx); + obj.Clear(); + } while (data.ValidOffset(offset)); + + // Now sort all of the object name pointers + m_object_name_to_index_map.Sort(); } return m_objects.size(); } @@ -237,8 +350,9 @@ ObjectContainerBSDArchive::Archive::shared_ptr ObjectContainerBSDArchive::Archive::ParseAndCacheArchiveForFile( const FileSpec &file, const ArchSpec &arch, const llvm::sys::TimePoint<> &time, lldb::offset_t file_offset, - DataExtractor &data) { - shared_ptr archive_sp(new Archive(arch, time, file_offset, data)); + DataExtractor &data, ArchiveType archive_type) { + shared_ptr archive_sp( + new Archive(arch, time, file_offset, data, archive_type)); if (archive_sp) { const size_t num_objects = archive_sp->ParseObjects(); if (num_objects > 0) { @@ -288,7 +402,8 @@ ObjectContainer *ObjectContainerBSDArchive::CreateInstance( // contents for the archive and cache it DataExtractor data; data.SetData(data_sp, data_offset, length); - if (file && data_sp && ObjectContainerBSDArchive::MagicBytesMatch(data)) { + ArchiveType archive_type = ObjectContainerBSDArchive::MagicBytesMatch(data); + if (file && data_sp && archive_type != ArchiveType::Invalid) { LLDB_SCOPED_TIMERF( "ObjectContainerBSDArchive::CreateInstance (module = %s, file = " "%p, file_offset = 0x%8.8" PRIx64 ", file_size = 0x%8.8" PRIx64 ")", @@ -312,7 +427,7 @@ ObjectContainer *ObjectContainerBSDArchive::CreateInstance( std::unique_ptr<ObjectContainerBSDArchive> container_up( new ObjectContainerBSDArchive(module_sp, archive_data_sp, archive_data_offset, file, file_offset, - length)); + length, archive_type)); if (container_up) { if (archive_sp) { @@ -331,7 +446,8 @@ ObjectContainer *ObjectContainerBSDArchive::CreateInstance( if (archive_sp) { std::unique_ptr<ObjectContainerBSDArchive> container_up( new ObjectContainerBSDArchive(module_sp, data_sp, data_offset, file, - file_offset, length)); + file_offset, length, + archive_sp->GetArchiveType())); if (container_up) { // We already have this archive in our cache, use it @@ -343,23 +459,35 @@ ObjectContainer *ObjectContainerBSDArchive::CreateInstance( return nullptr; } -bool ObjectContainerBSDArchive::MagicBytesMatch(const DataExtractor &data) { +ArchiveType +ObjectContainerBSDArchive::MagicBytesMatch(const DataExtractor &data) { uint32_t offset = 0; const char *armag = (const char *)data.PeekData(offset, sizeof(ar_hdr)); - if (armag && ::strncmp(armag, ARMAG, SARMAG) == 0) { + if (armag == nullptr) + return ArchiveType::Invalid; + if (::strncmp(armag, ARMAG, SARMAG) == 0) { armag += offsetof(struct ar_hdr, ar_fmag) + SARMAG; if (strncmp(armag, ARFMAG, 2) == 0) - return true; + return ArchiveType::Archive; + } else if (::strncmp(armag, ThinArchiveMagic, strlen(ThinArchiveMagic)) == + 0) { + armag += offsetof(struct ar_hdr, ar_fmag) + strlen(ThinArchiveMagic); + if (strncmp(armag, ARFMAG, 2) == 0) { + return ArchiveType::ThinArchive; + } } - return false; + return ArchiveType::Invalid; } ObjectContainerBSDArchive::ObjectContainerBSDArchive( const lldb::ModuleSP &module_sp, DataBufferSP &data_sp, lldb::offset_t data_offset, const lldb_private::FileSpec *file, - lldb::offset_t file_offset, lldb::offset_t size) + lldb::offset_t file_offset, lldb::offset_t size, ArchiveType archive_type) : ObjectContainer(module_sp, file, file_offset, size, data_sp, data_offset), - m_archive_sp() {} + m_archive_sp() { + m_archive_type = archive_type; +} + void ObjectContainerBSDArchive::SetArchive(Archive::shared_ptr &archive_sp) { m_archive_sp = archive_sp; } @@ -373,7 +501,7 @@ bool ObjectContainerBSDArchive::ParseHeader() { if (module_sp) { m_archive_sp = Archive::ParseAndCacheArchiveForFile( m_file, module_sp->GetArchitecture(), - module_sp->GetModificationTime(), m_offset, m_data); + module_sp->GetModificationTime(), m_offset, m_data, m_archive_type); } // Clear the m_data that contains the entire archive data and let our // m_archive_sp hold onto the data. @@ -407,6 +535,19 @@ void ObjectContainerBSDArchive::Dump(Stream *s) const { s->EOL(); } +FileSpec GetChildFileSpecificationsFromThin(llvm::StringRef childPath, + const FileSpec &parentFileSpec) { + llvm::SmallString<128> FullPath; + if (llvm::sys::path::is_absolute(childPath)) { + FullPath = childPath; + } else { + FullPath = parentFileSpec.GetDirectory().GetStringRef(); + llvm::sys::path::append(FullPath, childPath); + } + FileSpec child = FileSpec(FullPath.str(), llvm::sys::path::Style::posix); + return child; +} + ObjectFileSP ObjectContainerBSDArchive::GetObjectFile(const FileSpec *file) { ModuleSP module_sp(GetModule()); if (module_sp) { @@ -414,6 +555,22 @@ ObjectFileSP ObjectContainerBSDArchive::GetObjectFile(const FileSpec *file) { Object *object = m_archive_sp->FindObject( module_sp->GetObjectName(), module_sp->GetObjectModificationTime()); if (object) { + if (m_archive_type == ArchiveType::ThinArchive) { + // Set file to child object file + FileSpec child = GetChildFileSpecificationsFromThin( + object->ar_name.GetStringRef(), m_file); + lldb::offset_t file_offset = 0; + lldb::offset_t file_size = object->size; + std::shared_ptr<DataBuffer> child_data_sp = + FileSystem::Instance().CreateDataBuffer(child, file_size, + file_offset); + if (child_data_sp->GetByteSize() != object->file_size) + return ObjectFileSP(); + lldb::offset_t data_offset = 0; + return ObjectFile::FindPlugin( + module_sp, &child, m_offset + object->file_offset, + object->file_size, child_data_sp, data_offset); + } lldb::offset_t data_offset = object->file_offset; return ObjectFile::FindPlugin( module_sp, file, m_offset + object->file_offset, object->file_size, @@ -434,7 +591,8 @@ size_t ObjectContainerBSDArchive::GetModuleSpecifications( // contents for the archive and cache it DataExtractor data; data.SetData(data_sp, data_offset, data_sp->GetByteSize()); - if (!file || !data_sp || !ObjectContainerBSDArchive::MagicBytesMatch(data)) + ArchiveType archive_type = ObjectContainerBSDArchive::MagicBytesMatch(data); + if (!file || !data_sp || archive_type == ArchiveType::Invalid) return 0; const size_t initial_count = specs.GetSize(); @@ -449,7 +607,7 @@ size_t ObjectContainerBSDArchive::GetModuleSpecifications( if (data_sp) { data.SetData(data_sp, 0, data_sp->GetByteSize()); archive_sp = Archive::ParseAndCacheArchiveForFile( - file, ArchSpec(), file_mod_time, file_offset, data); + file, ArchSpec(), file_mod_time, file_offset, data, archive_type); } } @@ -458,6 +616,24 @@ size_t ObjectContainerBSDArchive::GetModuleSpecifications( for (size_t idx = 0; idx < num_objects; ++idx) { const Object *object = archive_sp->GetObjectAtIndex(idx); if (object) { + if (archive_sp->GetArchiveType() == ArchiveType::ThinArchive) { + if (object->ar_name.IsEmpty()) + continue; + FileSpec child = GetChildFileSpecificationsFromThin( + object->ar_name.GetStringRef(), file); + if (ObjectFile::GetModuleSpecifications(child, 0, object->file_size, + specs)) { + ModuleSpec &spec = + specs.GetModuleSpecRefAtIndex(specs.GetSize() - 1); + llvm::sys::TimePoint<> object_mod_time( + std::chrono::seconds(object->modification_time)); + spec.GetObjectName() = object->ar_name; + spec.SetObjectOffset(0); + spec.SetObjectSize(object->file_size); + spec.GetObjectModificationTime() = object_mod_time; + } + continue; + } const lldb::offset_t object_file_offset = file_offset + object->file_offset; if (object->file_offset < file_size && file_size > object_file_offset) { diff --git a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h index 21106d7b8590..ace072cbe149 100644 --- a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h +++ b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h @@ -15,19 +15,24 @@ #include "lldb/Utility/ConstString.h" #include "lldb/Utility/FileSpec.h" +#include "llvm/Object/Archive.h" #include "llvm/Support/Chrono.h" +#include "llvm/Support/Path.h" #include <map> #include <memory> #include <mutex> +enum class ArchiveType { Invalid, Archive, ThinArchive }; + class ObjectContainerBSDArchive : public lldb_private::ObjectContainer { public: ObjectContainerBSDArchive(const lldb::ModuleSP &module_sp, lldb::DataBufferSP &data_sp, lldb::offset_t data_offset, const lldb_private::FileSpec *file, - lldb::offset_t offset, lldb::offset_t length); + lldb::offset_t offset, lldb::offset_t length, + ArchiveType archive_type); ~ObjectContainerBSDArchive() override; @@ -54,7 +59,7 @@ public: lldb::offset_t length, lldb_private::ModuleSpecList &specs); - static bool MagicBytesMatch(const lldb_private::DataExtractor &data); + static ArchiveType MagicBytesMatch(const lldb_private::DataExtractor &data); // Member Functions bool ParseHeader() override; @@ -78,6 +83,10 @@ protected: void Clear(); + lldb::offset_t ExtractFromThin(const lldb_private::DataExtractor &data, + lldb::offset_t offset, + llvm::StringRef stringTable); + lldb::offset_t Extract(const lldb_private::DataExtractor &data, lldb::offset_t offset); /// Object name in the archive. @@ -112,7 +121,7 @@ protected: Archive(const lldb_private::ArchSpec &arch, const llvm::sys::TimePoint<> &mod_time, lldb::offset_t file_offset, - lldb_private::DataExtractor &data); + lldb_private::DataExtractor &data, ArchiveType archive_type); ~Archive(); @@ -127,7 +136,7 @@ protected: static Archive::shared_ptr ParseAndCacheArchiveForFile( const lldb_private::FileSpec &file, const lldb_private::ArchSpec &arch, const llvm::sys::TimePoint<> &mod_time, lldb::offset_t file_offset, - lldb_private::DataExtractor &data); + lldb_private::DataExtractor &data, ArchiveType archive_type); size_t GetNumObjects() const { return m_objects.size(); } @@ -156,6 +165,8 @@ protected: lldb_private::DataExtractor &GetData() { return m_data; } + ArchiveType GetArchiveType() { return m_archive_type; } + protected: typedef lldb_private::UniqueCStringMap<uint32_t> ObjectNameToIndexMap; // Member Variables @@ -167,11 +178,14 @@ protected: lldb_private::DataExtractor m_data; ///< The data for this object container ///so we don't lose data if the .a files ///gets modified + ArchiveType m_archive_type; }; void SetArchive(Archive::shared_ptr &archive_sp); Archive::shared_ptr m_archive_sp; + + ArchiveType m_archive_type; }; #endif // LLDB_SOURCE_PLUGINS_OBJECTCONTAINER_BSD_ARCHIVE_OBJECTCONTAINERBSDARCHIVE_H diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 28ccfbe3d6e6..f9fb36890d5a 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -3386,8 +3386,7 @@ size_t ObjectFileELF::ReadSectionData(Section *section, auto buffer_sp = std::make_shared<DataBufferHeap>(Decompressor->getDecompressedSize(), 0); if (auto error = Decompressor->decompress( - {reinterpret_cast<char *>(buffer_sp->GetBytes()), - size_t(buffer_sp->GetByteSize())})) { + {buffer_sp->GetBytes(), size_t(buffer_sp->GetByteSize())})) { GetModule()->ReportWarning( "Decompression of section '%s' failed: %s", section->GetName().GetCString(), diff --git a/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp b/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp index 1c10efed9564..44c708676e52 100644 --- a/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp +++ b/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp @@ -137,8 +137,6 @@ size_t ObjectFilePDB::GetModuleSpecifications( case PDB_Machine::x86: module_arch.SetTriple("i386-pc-windows"); specs.Append(module_spec); - module_arch.SetTriple("i686-pc-windows"); - specs.Append(module_spec); break; case PDB_Machine::ArmNT: module_arch.SetTriple("armv7-pc-windows"); diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 700e6ebdf84c..c44ace96dd55 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -2727,7 +2727,7 @@ bool GDBRemoteCommunicationClient::SetCurrentThread(uint64_t tid, m_curr_pid = ret->pid; m_curr_tid = ret->tid; } - return ret.hasValue(); + return ret.has_value(); } bool GDBRemoteCommunicationClient::SetCurrentThreadForRun(uint64_t tid, @@ -2742,7 +2742,7 @@ bool GDBRemoteCommunicationClient::SetCurrentThreadForRun(uint64_t tid, m_curr_pid_run = ret->pid; m_curr_tid_run = ret->tid; } - return ret.hasValue(); + return ret.has_value(); } bool GDBRemoteCommunicationClient::GetStopReply( diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h index 58ed22187747..d367f75cee0e 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h @@ -254,8 +254,6 @@ public: lldb::addr_t GetShlibInfoAddr(); - bool GetSupportsThreadSuffix(); - bool GetProcessInfo(lldb::pid_t pid, ProcessInstanceInfo &process_info); uint32_t FindProcesses(const ProcessInstanceInfoMatch &process_match_info, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 2aacac3692be..4b9354371bda 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2292,7 +2292,7 @@ DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit, int call_file = 0; int call_line = 0; int call_column = 0; - DWARFExpression frame_base; + DWARFExpressionList frame_base; const dw_tag_t tag = die.Tag(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index ec074be581b5..06cdd877f7dc 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -441,7 +441,7 @@ bool DWARFDIE::GetDIENamesAndRanges( const char *&name, const char *&mangled, DWARFRangeList &ranges, int &decl_file, int &decl_line, int &decl_column, int &call_file, int &call_line, int &call_column, - lldb_private::DWARFExpression *frame_base) const { + lldb_private::DWARFExpressionList *frame_base) const { if (IsValid()) { return m_die->GetDIENamesAndRanges( GetCU(), name, mangled, ranges, decl_file, decl_line, decl_column, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h index 5ee44a763204..7ce9550a081e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h @@ -85,11 +85,12 @@ public: DWARFDIE GetAttributeValueAsReferenceDIE(const dw_attr_t attr) const; - bool GetDIENamesAndRanges(const char *&name, const char *&mangled, - DWARFRangeList &ranges, int &decl_file, - int &decl_line, int &decl_column, int &call_file, - int &call_line, int &call_column, - lldb_private::DWARFExpression *frame_base) const; + bool + GetDIENamesAndRanges(const char *&name, const char *&mangled, + DWARFRangeList &ranges, int &decl_file, int &decl_line, + int &decl_column, int &call_file, int &call_line, + int &call_column, + lldb_private::DWARFExpressionList *frame_base) const; /// The range of all the children of this DIE. llvm::iterator_range<child_iterator> children() const; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index 95c0cb6472c5..c98953640a58 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -231,7 +231,7 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges( DWARFUnit *cu, const char *&name, const char *&mangled, DWARFRangeList &ranges, int &decl_file, int &decl_line, int &decl_column, int &call_file, int &call_line, int &call_column, - DWARFExpression *frame_base) const { + DWARFExpressionList *frame_base) const { dw_addr_t lo_pc = LLDB_INVALID_ADDRESS; dw_addr_t hi_pc = LLDB_INVALID_ADDRESS; std::vector<DWARFDIE> dies; @@ -345,21 +345,22 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges( uint32_t block_offset = form_value.BlockData() - data.GetDataStart(); uint32_t block_length = form_value.Unsigned(); - *frame_base = DWARFExpression( - module, DataExtractor(data, block_offset, block_length), cu); + *frame_base = + DWARFExpressionList(module, + DWARFExpression(DataExtractor( + data, block_offset, block_length)), + cu); } else { DataExtractor data = cu->GetLocationData(); const dw_offset_t offset = form_value.Unsigned(); if (data.ValidOffset(offset)) { data = DataExtractor(data, offset, data.GetByteSize() - offset); - *frame_base = DWARFExpression(module, data, cu); if (lo_pc != LLDB_INVALID_ADDRESS) { assert(lo_pc >= cu->GetBaseAddress()); - frame_base->SetLocationListAddresses(cu->GetBaseAddress(), - lo_pc); - } else { + DWARFExpression::ParseDWARFLocationList(cu, data, frame_base); + frame_base->SetFuncFileAddress(lo_pc); + } else set_frame_base_loclist_addr = true; - } } } } @@ -384,7 +385,7 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges( if (set_frame_base_loclist_addr) { dw_addr_t lowest_range_pc = ranges.GetMinRangeBase(0); assert(lowest_range_pc >= cu->GetBaseAddress()); - frame_base->SetLocationListAddresses(cu->GetBaseAddress(), lowest_range_pc); + frame_base->SetFuncFileAddress(lowest_range_pc); } if (ranges.IsEmpty() || name == nullptr || mangled == nullptr) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h index 64e86c71ac09..32f653e99a70 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h @@ -104,7 +104,7 @@ public: DWARFUnit *cu, const char *&name, const char *&mangled, DWARFRangeList &rangeList, int &decl_file, int &decl_line, int &decl_column, int &call_file, int &call_line, int &call_column, - lldb_private::DWARFExpression *frame_base = nullptr) const; + lldb_private::DWARFExpressionList *frame_base = nullptr) const; const DWARFAbbreviationDeclaration * GetAbbreviationDeclarationPtr(const DWARFUnit *cu) const; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index 903cd2e38f76..7b4a5d8eca3e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -579,6 +579,17 @@ void DWARFUnit::SetStrOffsetsBase(dw_offset_t str_offsets_base) { m_str_offsets_base = str_offsets_base; } +dw_addr_t DWARFUnit::ReadAddressFromDebugAddrSection(uint32_t index) const { + uint32_t index_size = GetAddressByteSize(); + dw_offset_t addr_base = GetAddrBase(); + dw_addr_t offset = addr_base + index * index_size; + const DWARFDataExtractor &data = + m_dwarf.GetDWARFContext().getOrLoadAddrData(); + if (data.ValidOffsetForDataOfSize(offset, index_size)) + return data.GetMaxU64_unchecked(&offset, index_size); + return LLDB_INVALID_ADDRESS; +} + // It may be called only with m_die_array_mutex held R/W. void DWARFUnit::ClearDIEsRWLocked() { m_die_array.clear(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 265e28b51c99..40a1943b847a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -166,6 +166,8 @@ public: void SetStrOffsetsBase(dw_offset_t str_offsets_base); virtual void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) = 0; + dw_addr_t ReadAddressFromDebugAddrSection(uint32_t index) const; + lldb::ByteOrder GetByteOrder() const; const DWARFDebugAranges &GetFunctionAranges(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index c0bf13e0281d..cbc24b1550c7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -9,6 +9,7 @@ #include "SymbolFileDWARF.h" #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Threading.h" @@ -1885,11 +1886,14 @@ SymbolFileDWARF::GlobalVariableMap &SymbolFileDWARF::GetGlobalAranges() { for (size_t g = 0; g < num_globals; ++g) { VariableSP var_sp = globals_sp->GetVariableAtIndex(g); if (var_sp && !var_sp->GetLocationIsConstantValueData()) { - const DWARFExpression &location = var_sp->LocationExpression(); + const DWARFExpressionList &location = + var_sp->LocationExpressionList(); Value location_result; Status error; - if (location.Evaluate(nullptr, LLDB_INVALID_ADDRESS, nullptr, - nullptr, location_result, &error)) { + ExecutionContext exe_ctx; + if (location.Evaluate(&exe_ctx, nullptr, LLDB_INVALID_ADDRESS, + nullptr, nullptr, location_result, + &error)) { if (location_result.GetValueType() == Value::ValueType::FileAddress) { lldb::addr_t file_addr = @@ -3163,7 +3167,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, const char *mangled = nullptr; Declaration decl; DWARFFormValue type_die_form; - DWARFExpression location; + DWARFExpressionList location_list(module, DWARFExpression(), die.GetCU()); bool is_external = false; bool is_artificial = false; DWARFFormValue const_value_form, location_form; @@ -3229,16 +3233,15 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, // present in the class declaration and DW_AT_location in the DIE defining // the member. bool location_is_const_value_data = false; - bool has_explicit_location = false; + bool has_explicit_location = location_form.IsValid(); bool use_type_size_for_value = false; if (location_form.IsValid()) { - has_explicit_location = true; if (DWARFFormValue::IsBlockForm(location_form.Form())) { const DWARFDataExtractor &data = die.GetData(); uint32_t block_offset = location_form.BlockData() - data.GetDataStart(); uint32_t block_length = location_form.Unsigned(); - location = DWARFExpression( + location_list = DWARFExpressionList( module, DataExtractor(data, block_offset, block_length), die.GetCU()); } else { DataExtractor data = die.GetCU()->GetLocationData(); @@ -3247,10 +3250,10 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, offset = die.GetCU()->GetLoclistOffset(offset).value_or(-1); if (data.ValidOffset(offset)) { data = DataExtractor(data, offset, data.GetByteSize() - offset); - location = DWARFExpression(module, data, die.GetCU()); - assert(func_low_pc != LLDB_INVALID_ADDRESS); - location.SetLocationListAddresses( - location_form.GetUnit()->GetBaseAddress(), func_low_pc); + const DWARFUnit *dwarf_cu = location_form.GetUnit(); + if (DWARFExpression::ParseDWARFLocationList(dwarf_cu, data, + &location_list)) + location_list.SetFuncFileAddress(func_low_pc); } } } else if (const_value_form.IsValid()) { @@ -3263,7 +3266,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, uint32_t block_offset = const_value_form.BlockData() - debug_info_data.GetDataStart(); uint32_t block_length = const_value_form.Unsigned(); - location = DWARFExpression( + location_list = DWARFExpressionList( module, DataExtractor(debug_info_data, block_offset, block_length), die.GetCU()); } else if (DWARFFormValue::IsDataForm(const_value_form.Form())) { @@ -3273,7 +3276,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, use_type_size_for_value = true; } else if (const char *str = const_value_form.AsCString()) { uint32_t string_length = strlen(str) + 1; - location = DWARFExpression( + location_list = DWARFExpressionList( module, DataExtractor(str, string_length, die.GetCU()->GetByteOrder(), die.GetCU()->GetAddressByteSize()), @@ -3323,16 +3326,19 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, // with locations like: DW_OP_addr(0x1000), DW_OP_constu(2), DW_OP_plus // so we need to look through the whole expression. bool is_static_lifetime = - has_explicit_mangled || (has_explicit_location && !location.IsValid()); + has_explicit_mangled || + (has_explicit_location && !location_list.IsValid()); // Check if the location has a DW_OP_addr with any address value... lldb::addr_t location_DW_OP_addr = LLDB_INVALID_ADDRESS; if (!location_is_const_value_data) { bool op_error = false; - location_DW_OP_addr = location.GetLocation_DW_OP_addr(0, op_error); + const DWARFExpression* location = location_list.GetAlwaysValidExpr(); + if (location) + location_DW_OP_addr = location->GetLocation_DW_OP_addr( + location_form.GetUnit(), 0, op_error); if (op_error) { StreamString strm; - location.DumpLocationForAddress(&strm, eDescriptionLevelFull, 0, 0, - nullptr); + location->DumpLocation(&strm, eDescriptionLevelFull, nullptr); GetObjectFile()->GetModule()->ReportError( "0x%8.8x: %s has an invalid location: %s", die.GetOffset(), die.GetTagAsCString(), strm.GetData()); @@ -3345,7 +3351,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, // Set the module of the expression to the linked module // instead of the object file so the relocated address can be // found there. - location.SetModule(debug_map_symfile->GetObjectFile()->GetModule()); + location_list.SetModule(debug_map_symfile->GetObjectFile()->GetModule()); if (is_static_lifetime) { if (is_external) @@ -3386,7 +3392,9 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, const addr_t exe_file_addr = exe_symbol->GetAddressRef().GetFileAddress(); if (exe_file_addr != LLDB_INVALID_ADDRESS) { - if (location.Update_DW_OP_addr(exe_file_addr)) { + DWARFExpression *location = + location_list.GetMutableExpressionAtAddress(); + if (location->Update_DW_OP_addr(exe_file_addr)) { linked_oso_file_addr = true; symbol_context_scope = exe_symbol; } @@ -3404,7 +3412,9 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, debug_map_symfile->LinkOSOFileAddress(this, location_DW_OP_addr); if (exe_file_addr != LLDB_INVALID_ADDRESS) { // Update the file address for this variable - location.Update_DW_OP_addr(exe_file_addr); + DWARFExpression *location = + location_list.GetMutableExpressionAtAddress(); + location->Update_DW_OP_addr(exe_file_addr); } else { // Variable didn't make it into the final executable return nullptr; @@ -3419,8 +3429,8 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, scope = eValueTypeVariableLocal; if (debug_map_symfile) { // We need to check for TLS addresses that we need to fixup - if (location.ContainsThreadLocalStorage()) { - location.LinkThreadLocalStorage( + if (location_list.ContainsThreadLocalStorage()) { + location_list.LinkThreadLocalStorage( debug_map_symfile->GetObjectFile()->GetModule(), [this, debug_map_symfile]( lldb::addr_t unlinked_file_addr) -> lldb::addr_t { @@ -3463,14 +3473,17 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, auto type_sp = std::make_shared<SymbolFileType>( *this, GetUID(type_die_form.Reference())); - if (use_type_size_for_value && type_sp->GetType()) - location.UpdateValue(const_value_form.Unsigned(), - type_sp->GetType()->GetByteSize(nullptr).value_or(0), - die.GetCU()->GetAddressByteSize()); + if (use_type_size_for_value && type_sp->GetType()) { + DWARFExpression *location = location_list.GetMutableExpressionAtAddress(); + location->UpdateValue( + const_value_form.Unsigned(), + type_sp->GetType()->GetByteSize(nullptr).getValueOr(0), + die.GetCU()->GetAddressByteSize()); + } return std::make_shared<Variable>( die.GetID(), name, mangled, type_sp, scope, symbol_context_scope, - scope_ranges, &decl, location, is_external, is_artificial, + scope_ranges, &decl, location_list, is_external, is_artificial, location_is_const_value_data, is_static_member); } @@ -3755,8 +3768,8 @@ CollectCallSiteParameters(ModuleSP module, DWARFDIE call_site_die) { child.Tag() != DW_TAG_GNU_call_site_parameter) continue; - llvm::Optional<DWARFExpression> LocationInCallee; - llvm::Optional<DWARFExpression> LocationInCaller; + llvm::Optional<DWARFExpressionList> LocationInCallee; + llvm::Optional<DWARFExpressionList> LocationInCaller; DWARFAttributes attributes; const size_t num_attributes = child.GetAttributes(attributes); @@ -3764,7 +3777,7 @@ CollectCallSiteParameters(ModuleSP module, DWARFDIE call_site_die) { // Parse the location at index \p attr_index within this call site parameter // DIE, or return None on failure. auto parse_simple_location = - [&](int attr_index) -> llvm::Optional<DWARFExpression> { + [&](int attr_index) -> llvm::Optional<DWARFExpressionList> { DWARFFormValue form_value; if (!attributes.ExtractFormValueAtIndex(attr_index, form_value)) return {}; @@ -3773,9 +3786,9 @@ CollectCallSiteParameters(ModuleSP module, DWARFDIE call_site_die) { auto data = child.GetData(); uint32_t block_offset = form_value.BlockData() - data.GetDataStart(); uint32_t block_length = form_value.Unsigned(); - return DWARFExpression(module, - DataExtractor(data, block_offset, block_length), - child.GetCU()); + return DWARFExpressionList( + module, DataExtractor(data, block_offset, block_length), + child.GetCU()); }; for (size_t i = 0; i < num_attributes; ++i) { @@ -3820,7 +3833,7 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) { continue; llvm::Optional<DWARFDIE> call_origin; - llvm::Optional<DWARFExpression> call_target; + llvm::Optional<DWARFExpressionList> call_target; addr_t return_pc = LLDB_INVALID_ADDRESS; addr_t call_inst_pc = LLDB_INVALID_ADDRESS; addr_t low_pc = LLDB_INVALID_ADDRESS; @@ -3881,7 +3894,7 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) { auto data = child.GetData(); uint32_t block_offset = form_value.BlockData() - data.GetDataStart(); uint32_t block_length = form_value.Unsigned(); - call_target = DWARFExpression( + call_target = DWARFExpressionList( module, DataExtractor(data, block_offset, block_length), child.GetCU()); } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 2403ee2624ea..cfd18f02053b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -21,6 +21,7 @@ #include "lldb/Core/UniqueCStringMap.h" #include "lldb/Core/dwarf.h" +#include "lldb/Expression/DWARFExpressionList.h" #include "lldb/Symbol/DebugMacros.h" #include "lldb/Symbol/SymbolContext.h" #include "lldb/Symbol/SymbolFile.h" diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/DWARFLocationExpression.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/DWARFLocationExpression.cpp index 3ba0079c96e6..3166c8ae65c6 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/DWARFLocationExpression.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/DWARFLocationExpression.cpp @@ -122,7 +122,7 @@ static DWARFExpression MakeLocationExpressionInternal(lldb::ModuleSP module, DataBufferSP buffer = std::make_shared<DataBufferHeap>(stream.GetData(), stream.GetSize()); DataExtractor extractor(buffer, byte_order, address_size, byte_size); - DWARFExpression result(module, extractor, nullptr); + DWARFExpression result(extractor); result.SetRegisterKind(register_kind); return result; @@ -247,7 +247,7 @@ DWARFExpression lldb_private::npdb::MakeConstantLocationExpression( .take_front(size); buffer->CopyData(bytes.data(), size); DataExtractor extractor(buffer, lldb::eByteOrderLittle, address_size); - DWARFExpression result(nullptr, extractor, nullptr); + DWARFExpression result(extractor); return result; } diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp index 7bb7c69eece7..6317b140f7e8 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp @@ -602,7 +602,7 @@ static RegisterId GetBaseFrameRegister(PdbIndex &index, } VariableInfo lldb_private::npdb::GetVariableLocationInfo( - PdbIndex &index, PdbCompilandSymId var_id, Block &block, + PdbIndex &index, PdbCompilandSymId var_id, Block &func_block, lldb::ModuleSP module) { CVSymbol sym = index.ReadSymbolRecord(var_id); @@ -642,14 +642,8 @@ VariableInfo lldb_private::npdb::GetVariableLocationInfo( Variable::RangeList ranges = MakeRangeList(index, loc.Range, loc.Gaps); - // TODO: may be better to pass function scope and not lookup it every - // time? find nearest parent function block - Block *cur = █ - while (cur->GetParent()) { - cur = cur->GetParent(); - } PdbCompilandSymId func_scope_id = - PdbSymUid(cur->GetID()).asCompilandSym(); + PdbSymUid(func_block.GetID()).asCompilandSym(); CVSymbol func_block_cvs = index.ReadSymbolRecord(func_scope_id); lldbassert(func_block_cvs.kind() == S_GPROC32 || func_block_cvs.kind() == S_LPROC32); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h index 138c11aaeb43..066bcc89fd3b 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h @@ -142,8 +142,8 @@ LookThroughModifierRecord(llvm::codeview::CVType modifier); llvm::StringRef DropNameScope(llvm::StringRef name); VariableInfo GetVariableNameInfo(llvm::codeview::CVSymbol symbol); -VariableInfo GetVariableLocationInfo(PdbIndex &index, PdbCompilandSymId var_id, Block& block, - lldb::ModuleSP module); +VariableInfo GetVariableLocationInfo(PdbIndex &index, PdbCompilandSymId var_id, + Block &func_block, lldb::ModuleSP module); size_t GetTypeSizeForSimpleKind(llvm::codeview::SimpleTypeKind kind); lldb::BasicType diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index 7dc99818c244..7e10e315be20 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -823,8 +823,10 @@ VariableSP SymbolFileNativePDB::CreateGlobalVariable(PdbGlobalSymId var_id) { m_ast->GetOrCreateVariableDecl(var_id); - DWARFExpression location = MakeGlobalLocationExpression( - section, offset, GetObjectFile()->GetModule()); + ModuleSP module_sp = GetObjectFile()->GetModule(); + DWARFExpressionList location( + module_sp, MakeGlobalLocationExpression(section, offset, module_sp), + nullptr); std::string global_name("::"); global_name += name; @@ -855,8 +857,10 @@ SymbolFileNativePDB::CreateConstantSymbol(PdbGlobalSymId var_id, Declaration decl; Variable::RangeList ranges; ModuleSP module = GetObjectFile()->GetModule(); - DWARFExpression location = MakeConstantLocationExpression( - constant.Type, tpi, constant.Value, module); + DWARFExpressionList location(module, + MakeConstantLocationExpression( + constant.Type, tpi, constant.Value, module), + nullptr); bool external = false; bool artificial = false; @@ -1689,8 +1693,15 @@ VariableSP SymbolFileNativePDB::CreateLocalVariable(PdbCompilandSymId scope_id, bool is_param) { ModuleSP module = GetObjectFile()->GetModule(); Block &block = GetOrCreateBlock(scope_id); + // Get function block. + Block *func_block = █ + while (func_block->GetParent()) { + func_block = func_block->GetParent(); + } + Address addr; + func_block->GetStartAddress(addr); VariableInfo var_info = - GetVariableLocationInfo(*m_index, var_id, block, module); + GetVariableLocationInfo(*m_index, var_id, *func_block, module); if (!var_info.location || !var_info.ranges) return nullptr; @@ -1709,11 +1720,12 @@ VariableSP SymbolFileNativePDB::CreateLocalVariable(PdbCompilandSymId scope_id, bool artificial = false; bool location_is_constant_data = false; bool static_member = false; + DWARFExpressionList locaiton_list = DWARFExpressionList( + module, *var_info.location, nullptr); VariableSP var_sp = std::make_shared<Variable>( toOpaqueUid(var_id), name.c_str(), name.c_str(), sftype, var_scope, - &block, *var_info.ranges, &decl, *var_info.location, external, - artificial, location_is_constant_data, static_member); - + &block, *var_info.ranges, &decl, locaiton_list, external, artificial, + location_is_constant_data, static_member); if (!is_param) m_ast->GetOrCreateVariableDecl(scope_id, var_id); diff --git a/lldb/source/Plugins/SymbolFile/PDB/PDBLocationToDWARFExpression.cpp b/lldb/source/Plugins/SymbolFile/PDB/PDBLocationToDWARFExpression.cpp index 96e9de704e41..94023737b2a2 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/PDBLocationToDWARFExpression.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/PDBLocationToDWARFExpression.cpp @@ -175,7 +175,7 @@ DWARFExpression ConvertPDBLocationToDWARFExpression( DataBufferSP buffer = std::make_shared<DataBufferHeap>(stream.GetData(), stream.GetSize()); DataExtractor extractor(buffer, byte_order, address_size, byte_size); - DWARFExpression result(module, extractor, nullptr); + DWARFExpression result(extractor); result.SetRegisterKind(register_kind); return result; diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index bd3d16aad6c2..baa48532864b 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -1023,8 +1023,11 @@ VariableSP SymbolFilePDB::ParseVariableForPDBData( auto mangled_cstr = mangled.empty() ? nullptr : mangled.c_str(); bool is_constant; - DWARFExpression location = ConvertPDBLocationToDWARFExpression( - GetObjectFile()->GetModule(), pdb_data, ranges, is_constant); + ModuleSP module_sp = GetObjectFile()->GetModule(); + DWARFExpressionList location(module_sp, + ConvertPDBLocationToDWARFExpression( + module_sp, pdb_data, ranges, is_constant), + nullptr); var_sp = std::make_shared<Variable>( var_uid, var_name.c_str(), mangled_cstr, type_sp, scope, context_scope, diff --git a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp index 6c386f6a83fa..91eafdaa11bc 100644 --- a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp @@ -32,13 +32,12 @@ Status CommandObjectThreadTraceStartIntelPT::CommandOptions::SetOptionValue( switch (short_option) { case 's': { - int64_t ipt_trace_size; - if (option_arg.empty() || option_arg.getAsInteger(0, ipt_trace_size) || - ipt_trace_size < 0) - error.SetErrorStringWithFormat("invalid integer value for option '%s'", - option_arg.str().c_str()); + if (Optional<uint64_t> bytes = + ParsingUtils::ParseUserFriendlySizeExpression(option_arg)) + m_ipt_trace_size = *bytes; else - m_ipt_trace_size = ipt_trace_size; + error.SetErrorStringWithFormat("invalid bytes expression for '%s'", + option_arg.str().c_str()); break; } case 't': { @@ -98,24 +97,21 @@ Status CommandObjectProcessTraceStartIntelPT::CommandOptions::SetOptionValue( switch (short_option) { case 's': { - int64_t ipt_trace_size; - if (option_arg.empty() || option_arg.getAsInteger(0, ipt_trace_size) || - ipt_trace_size < 0) - error.SetErrorStringWithFormat("invalid integer value for option '%s'", - option_arg.str().c_str()); + if (Optional<uint64_t> bytes = + ParsingUtils::ParseUserFriendlySizeExpression(option_arg)) + m_ipt_trace_size = *bytes; else - m_ipt_trace_size = ipt_trace_size; + error.SetErrorStringWithFormat("invalid bytes expression for '%s'", + option_arg.str().c_str()); break; } case 'l': { - int64_t process_buffer_size_limit; - if (option_arg.empty() || - option_arg.getAsInteger(0, process_buffer_size_limit) || - process_buffer_size_limit < 0) - error.SetErrorStringWithFormat("invalid integer value for option '%s'", - option_arg.str().c_str()); + if (Optional<uint64_t> bytes = + ParsingUtils::ParseUserFriendlySizeExpression(option_arg)) + m_process_buffer_size_limit = *bytes; else - m_process_buffer_size_limit = process_buffer_size_limit; + error.SetErrorStringWithFormat("invalid bytes expression for '%s'", + option_arg.str().c_str()); break; } case 't': { @@ -126,6 +122,10 @@ Status CommandObjectProcessTraceStartIntelPT::CommandOptions::SetOptionValue( m_per_cpu_tracing = true; break; } + case 'd': { + m_disable_cgroup_filtering = true; + break; + } case 'p': { int64_t psb_period; if (option_arg.empty() || option_arg.getAsInteger(0, psb_period) || @@ -149,6 +149,7 @@ void CommandObjectProcessTraceStartIntelPT::CommandOptions:: m_enable_tsc = kDefaultEnableTscValue; m_psb_period = kDefaultPsbPeriod; m_per_cpu_tracing = kDefaultPerCpuTracing; + m_disable_cgroup_filtering = kDefaultDisableCgroupFiltering; } llvm::ArrayRef<OptionDefinition> @@ -158,13 +159,55 @@ CommandObjectProcessTraceStartIntelPT::CommandOptions::GetDefinitions() { bool CommandObjectProcessTraceStartIntelPT::DoExecute( Args &command, CommandReturnObject &result) { - if (Error err = m_trace.Start(m_options.m_ipt_trace_size, - m_options.m_process_buffer_size_limit, - m_options.m_enable_tsc, m_options.m_psb_period, - m_options.m_per_cpu_tracing)) + if (Error err = m_trace.Start( + m_options.m_ipt_trace_size, m_options.m_process_buffer_size_limit, + m_options.m_enable_tsc, m_options.m_psb_period, + m_options.m_per_cpu_tracing, m_options.m_disable_cgroup_filtering)) result.SetError(Status(std::move(err))); else result.SetStatus(eReturnStatusSuccessFinishResult); return result.Succeeded(); } + +Optional<uint64_t> +ParsingUtils::ParseUserFriendlySizeExpression(llvm::StringRef size_expression) { + if (size_expression.empty()) { + return llvm::None; + } + const uint64_t kBytesMultiplier = 1; + const uint64_t kKibiBytesMultiplier = 1024; + const uint64_t kMebiBytesMultiplier = 1024 * 1024; + + DenseMap<StringRef, uint64_t> multipliers = { + {"mib", kMebiBytesMultiplier}, {"mb", kMebiBytesMultiplier}, + {"m", kMebiBytesMultiplier}, {"kib", kKibiBytesMultiplier}, + {"kb", kKibiBytesMultiplier}, {"k", kKibiBytesMultiplier}, + {"b", kBytesMultiplier}, {"", kBytesMultiplier}}; + + const auto non_digit_index = size_expression.find_first_not_of("0123456789"); + if (non_digit_index == 0) { // expression starts from from non-digit char. + return llvm::None; + } + + const llvm::StringRef number_part = + non_digit_index == llvm::StringRef::npos + ? size_expression + : size_expression.substr(0, non_digit_index); + uint64_t parsed_number; + if (number_part.getAsInteger(10, parsed_number)) { + return llvm::None; + } + + if (non_digit_index != llvm::StringRef::npos) { // if expression has units. + const auto multiplier = size_expression.substr(non_digit_index).lower(); + + auto it = multipliers.find(multiplier); + if (it == multipliers.end()) + return llvm::None; + + return parsed_number * it->second; + } else { + return parsed_number; + } +} diff --git a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h index b5d6a0f24043..083184e4817c 100644 --- a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h @@ -79,6 +79,7 @@ public: bool m_enable_tsc; llvm::Optional<uint64_t> m_psb_period; bool m_per_cpu_tracing; + bool m_disable_cgroup_filtering; }; CommandObjectProcessTraceStartIntelPT(TraceIntelPT &trace, @@ -109,6 +110,23 @@ protected: CommandOptions m_options; }; +namespace ParsingUtils { +/// Convert an integral size expression like 12KiB or 4MB into bytes. The units +/// are taken loosely to help users input sizes into LLDB, e.g. KiB and KB are +/// considered the same (2^20 bytes) for simplicity. +/// +/// \param[in] size_expression +/// String expression which is an integral number plus a unit that can be +/// lower or upper case. Supported units: K, KB and KiB for 2^10 bytes; M, +/// MB and MiB for 2^20 bytes; and B for bytes. A single integral number is +/// considered bytes. +/// \return +/// The converted number of bytes or \a llvm::None if the expression is +/// invalid. +llvm::Optional<uint64_t> +ParseUserFriendlySizeExpression(llvm::StringRef size_expression); +} // namespace ParsingUtils + } // namespace trace_intel_pt } // namespace lldb_private diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp index 578828ff1633..0859c5a20b7e 100644 --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp @@ -63,11 +63,28 @@ DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind) { void DecodedThread::NotifyTsc(uint64_t tsc) { if (!m_last_tsc || *m_last_tsc != tsc) { - m_instruction_timestamps.emplace(m_item_kinds.size(), tsc); + m_timestamps.emplace(m_item_kinds.size(), tsc); m_last_tsc = tsc; } } +void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) { + if (!m_last_cpu || *m_last_cpu != cpu_id) { + m_cpus.emplace(m_item_kinds.size(), cpu_id); + m_last_cpu = cpu_id; + AppendEvent(lldb::eTraceEventCPUChanged); + } +} + +Optional<lldb::cpu_id_t> +DecodedThread::GetCPUByIndex(uint64_t insn_index) const { + // Could possibly optimize the search + auto it = m_cpus.upper_bound(insn_index); + if (it == m_cpus.begin()) + return None; + return prev(it)->second; +} + void DecodedThread::AppendEvent(lldb::TraceEvent event) { CreateNewTraceItem(lldb::eTraceItemKindEvent).event = event; m_events_stats.RecordEvent(event); @@ -136,8 +153,8 @@ Optional<DecodedThread::TscRange> DecodedThread::CalculateTscRange( return candidate_range; } // Now we do a more expensive lookup - auto it = m_instruction_timestamps.upper_bound(insn_index); - if (it == m_instruction_timestamps.begin()) + auto it = m_timestamps.upper_bound(insn_index); + if (it == m_timestamps.begin()) return None; return TscRange(--it, *this); @@ -160,7 +177,8 @@ lldb::TraceCursorUP DecodedThread::CreateNewCursor() { size_t DecodedThread::CalculateApproximateMemoryUsage() const { return sizeof(TraceItemStorage) * m_item_data.size() + sizeof(uint8_t) * m_item_kinds.size() + - (sizeof(size_t) + sizeof(uint64_t)) * m_instruction_timestamps.size(); + (sizeof(size_t) + sizeof(uint64_t)) * m_timestamps.size() + + (sizeof(size_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size(); } DecodedThread::TscRange::TscRange(std::map<size_t, uint64_t>::const_iterator it, @@ -168,7 +186,7 @@ DecodedThread::TscRange::TscRange(std::map<size_t, uint64_t>::const_iterator it, : m_it(it), m_decoded_thread(&decoded_thread) { auto next_it = m_it; ++next_it; - m_end_index = (next_it == m_decoded_thread->m_instruction_timestamps.end()) + m_end_index = (next_it == m_decoded_thread->m_timestamps.end()) ? std::numeric_limits<uint64_t>::max() : next_it->first - 1; } @@ -191,13 +209,13 @@ bool DecodedThread::TscRange::InRange(size_t insn_index) const { Optional<DecodedThread::TscRange> DecodedThread::TscRange::Next() const { auto next_it = m_it; ++next_it; - if (next_it == m_decoded_thread->m_instruction_timestamps.end()) + if (next_it == m_decoded_thread->m_timestamps.end()) return None; return TscRange(next_it, *m_decoded_thread); } Optional<DecodedThread::TscRange> DecodedThread::TscRange::Prev() const { - if (m_it == m_decoded_thread->m_instruction_timestamps.begin()) + if (m_it == m_decoded_thread->m_timestamps.begin()) return None; auto prev_it = m_it; --prev_it; diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h index b17e927fafe4..bd1a90aaf250 100644 --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h @@ -168,6 +168,15 @@ public: /// The underlying event type for the given trace item index. lldb::TraceEvent GetEventByIndex(int item_index) const; + /// Get the most recent CPU id before or at the given trace item index. + /// + /// \param[in] item_index + /// The trace item index to compare with. + /// + /// \return + /// The requested cpu id, or \a llvm::None if not available. + llvm::Optional<lldb::cpu_id_t> GetCPUByIndex(uint64_t item_index) const; + /// \return /// The load address of the instruction at the given index. lldb::addr_t GetInstructionLoadAddress(size_t item_index) const; @@ -204,8 +213,13 @@ public: lldb::ThreadSP GetThread(); /// Notify this object that a new tsc has been seen. + /// If this a new TSC, an event will be created. void NotifyTsc(uint64_t tsc); + /// Notify this object that a CPU has been seen. + /// If this a new CPU, an event will be created. + void NotifyCPU(lldb::cpu_id_t cpu_id); + /// Append a decoding error. void AppendError(const IntelPTError &error); @@ -254,10 +268,17 @@ private: /// are sporadic and we can think of them as ranges. If TSCs are present in /// the trace, all instructions will have an associated TSC, including the /// first one. Otherwise, this map will be empty. - std::map<uint64_t, uint64_t> m_instruction_timestamps; + std::map<uint64_t, uint64_t> m_timestamps; /// This is the chronologically last TSC that has been added. llvm::Optional<uint64_t> m_last_tsc = llvm::None; + // The cpu information is stored as a map. It maps `instruction index -> CPU` + // A CPU is associated with the next instructions that follow until the next + // cpu is seen. + std::map<uint64_t, lldb::cpu_id_t> m_cpus; + /// This is the chronologically last CPU ID. + llvm::Optional<uint64_t> m_last_cpu = llvm::None; + /// Statistics of all tracing events. EventsStats m_events_stats; /// Statistics of libipt errors when decoding TSCs. diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp index dd34467e38b5..a98337a4e058 100644 --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp @@ -285,6 +285,8 @@ Error lldb_private::trace_intel_pt::DecodeSystemWideTraceForThread( for (size_t i = 0; i < executions.size(); i++) { const IntelPTThreadContinousExecution &execution = executions[i]; + decoded_thread.NotifyCPU(execution.thread_execution.cpu_id); + auto variant = execution.thread_execution.variant; // If we haven't seen a PSB yet, then it's fine not to show errors if (has_seen_psbs) { diff --git a/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp index 6b4251a0fcd9..0c468cf7852f 100644 --- a/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp @@ -16,8 +16,13 @@ using namespace llvm; /// non-linux platforms. /// \{ #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) -#define PERF_RECORD_MAX 19 + +#define PERF_RECORD_LOST 2 +#define PERF_RECORD_THROTTLE 5 +#define PERF_RECORD_UNTHROTTLE 6 +#define PERF_RECORD_LOST_SAMPLES 13 #define PERF_RECORD_SWITCH_CPU_WIDE 15 +#define PERF_RECORD_MAX 19 struct perf_event_header { uint32_t type; @@ -54,6 +59,11 @@ struct perf_event_header { bool IsContextSwitchRecord() const { return type == PERF_RECORD_SWITCH_CPU_WIDE; } + + bool IsErrorRecord() const { + return type == PERF_RECORD_LOST || type == PERF_RECORD_THROTTLE || + type == PERF_RECORD_UNTHROTTLE || type == PERF_RECORD_LOST_SAMPLES; + } }; /// \} @@ -286,3 +296,36 @@ lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace( return executions; } + +Expected<std::vector<uint8_t>> +lldb_private::trace_intel_pt::FilterProcessesFromContextSwitchTrace( + llvm::ArrayRef<uint8_t> data, const std::set<lldb::pid_t> &pids) { + size_t offset = 0; + std::vector<uint8_t> out_data; + + while (offset < data.size()) { + const perf_event_header &perf_record = + *reinterpret_cast<const perf_event_header *>(data.data() + offset); + if (Error err = perf_record.SanityCheck()) + return std::move(err); + bool should_copy = false; + if (perf_record.IsContextSwitchRecord()) { + const PerfContextSwitchRecord &context_switch_record = + *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + + offset); + if (pids.count(context_switch_record.pid)) + should_copy = true; + } else if (perf_record.IsErrorRecord()) { + should_copy = true; + } + + if (should_copy) { + for (size_t i = 0; i < perf_record.size; i++) { + out_data.push_back(data[offset + i]); + } + } + + offset += perf_record.size; + } + return out_data; +} diff --git a/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h index 721aa1d77481..a16a437e1888 100644 --- a/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h @@ -14,6 +14,7 @@ #include "llvm/Support/Error.h" +#include <set> #include <vector> namespace lldb_private { @@ -139,6 +140,10 @@ DecodePerfContextSwitchTrace(llvm::ArrayRef<uint8_t> data, lldb::cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion); +llvm::Expected<std::vector<uint8_t>> +FilterProcessesFromContextSwitchTrace(llvm::ArrayRef<uint8_t> data, + const std::set<lldb::pid_t> &pids); + } // namespace trace_intel_pt } // namespace lldb_private diff --git a/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h b/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h index 92b563257437..2b85ed30334f 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h +++ b/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h @@ -35,9 +35,9 @@ public: /// /// \return /// The return value of the task. - template <class R> R TimeTask(llvm::StringRef name, std::function<R()> task) { + template <typename C> auto TimeTask(llvm::StringRef name, C task) { auto start = std::chrono::steady_clock::now(); - R result = task(); + auto result = task(); auto end = std::chrono::steady_clock::now(); std::chrono::milliseconds duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start); diff --git a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp index 8b90afb219af..d3ac61f7e658 100644 --- a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp @@ -35,9 +35,8 @@ Expected<DecodedThreadSP> ThreadDecoder::Decode() { } llvm::Expected<DecodedThreadSP> ThreadDecoder::DoDecode() { - return m_trace.GetTimer() - .ForThread(m_thread_sp->GetID()) - .TimeTask<Expected<DecodedThreadSP>>( + return m_trace.GetThreadTimer(m_thread_sp->GetID()) + .TimeTask( "Decoding instructions", [&]() -> Expected<DecodedThreadSP> { DecodedThreadSP decoded_thread_sp = std::make_shared<DecodedThread>(m_thread_sp); diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp index da91ba7c13d8..185c02b6bcd9 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp @@ -84,6 +84,10 @@ TraceCursorIntelPT::GetCounter(lldb::TraceCounter counter_type) const { } } +Optional<lldb::cpu_id_t> TraceCursorIntelPT::GetCPU() const { + return m_decoded_thread_sp->GetCPUByIndex(m_pos); +} + lldb::TraceEvent TraceCursorIntelPT::GetEventType() const { return m_decoded_thread_sp->GetEventByIndex(m_pos); } diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h index c90431de3bbc..2e0f67e67dfc 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h @@ -34,6 +34,8 @@ public: lldb::TraceEvent GetEventType() const override; + llvm::Optional<lldb::cpu_id_t> GetCPU() const override; + lldb::TraceItemKind GetItemKind() const override; bool GoToId(lldb::user_id_t id) override; diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp index fc7a103fbe15..57433ffb14cb 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp @@ -55,9 +55,9 @@ StringRef TraceIntelPT::GetSchema() { void TraceIntelPT::Dump(Stream *s) const {} -llvm::Error TraceIntelPT::SaveLiveTraceToDisk(FileSpec directory) { +Expected<FileSpec> TraceIntelPT::SaveToDisk(FileSpec directory, bool compact) { RefreshLiveProcessState(); - return TraceIntelPTBundleSaver().SaveToDisk(*this, directory); + return TraceIntelPTBundleSaver().SaveToDisk(*this, directory, compact); } Expected<TraceSP> TraceIntelPT::CreateInstanceForTraceBundle( @@ -146,10 +146,16 @@ TraceIntelPT::CreateNewCursor(Thread &thread) { return decoded_thread.takeError(); } -void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose) { +void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose, + bool json) { Storage &storage = GetUpdatedStorage(); lldb::tid_t tid = thread.GetID(); + if (json) { + DumpTraceInfoAsJson(thread, s, verbose); + return; + } + s.Format("\nthread #{0}: tid = {1}", thread.GetIndexID(), thread.GetID()); if (!IsTraced(tid)) { s << ", not traced\n"; @@ -172,12 +178,14 @@ void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose) { } Optional<uint64_t> raw_size = *raw_size_or_error; + s.Format("\n Trace technology: {0}\n", GetPluginName()); + /// Instruction stats { uint64_t items_count = decoded_thread_sp->GetItemsCount(); uint64_t mem_used = decoded_thread_sp->CalculateApproximateMemoryUsage(); - s.Format(" Total number of trace items: {0}\n", items_count); + s.Format("\n Total number of trace items: {0}\n", items_count); s << "\n Memory usage:\n"; if (raw_size) @@ -199,10 +207,10 @@ void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose) { std::chrono::milliseconds duration) { s.Format(" {0}: {1:2}s\n", name, duration.count() / 1000.0); }; - GetTimer().ForThread(tid).ForEachTimedTask(print_duration); + GetThreadTimer(tid).ForEachTimedTask(print_duration); s << "\n Timing for global tasks:\n"; - GetTimer().ForGlobal().ForEachTimedTask(print_duration); + GetGlobalTimer().ForEachTimedTask(print_duration); } // Instruction events stats @@ -226,6 +234,12 @@ void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose) { s.Format( " Number of continuous executions for this thread: {0}\n", storage.multicpu_decoder->GetNumContinuousExecutionsForThread(tid)); + s.Format(" Total number of PSB blocks found: {0}\n", + storage.multicpu_decoder->GetTotalPSBBlocksCount()); + s.Format(" Number of PSB blocks for this thread: {0}\n", + storage.multicpu_decoder->GePSBBlocksCountForThread(tid)); + s.Format(" Total number of unattributed PSB blocks found: {0}\n", + storage.multicpu_decoder->GetUnattributedPSBBlocksCount()); } // Errors @@ -243,6 +257,117 @@ void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose) { } } +void TraceIntelPT::DumpTraceInfoAsJson(Thread &thread, Stream &s, + bool verbose) { + Storage &storage = GetUpdatedStorage(); + + lldb::tid_t tid = thread.GetID(); + json::OStream json_str(s.AsRawOstream(), 2); + if (!IsTraced(tid)) { + s << "error: thread not traced\n"; + return; + } + + Expected<Optional<uint64_t>> raw_size_or_error = GetRawTraceSize(thread); + if (!raw_size_or_error) { + s << "error: " << toString(raw_size_or_error.takeError()) << "\n"; + return; + } + + Expected<DecodedThreadSP> decoded_thread_sp_or_err = Decode(thread); + if (!decoded_thread_sp_or_err) { + s << "error: " << toString(decoded_thread_sp_or_err.takeError()) << "\n"; + return; + } + DecodedThreadSP &decoded_thread_sp = *decoded_thread_sp_or_err; + + json_str.object([&] { + json_str.attribute("traceTechnology", "intel-pt"); + json_str.attributeObject("threadStats", [&] { + json_str.attribute("tid", tid); + + uint64_t insn_len = decoded_thread_sp->GetItemsCount(); + json_str.attribute("traceItemsCount", insn_len); + + // Instruction stats + uint64_t mem_used = decoded_thread_sp->CalculateApproximateMemoryUsage(); + json_str.attributeObject("memoryUsage", [&] { + json_str.attribute("totalInBytes", std::to_string(mem_used)); + Optional<double> avg; + if (insn_len != 0) + avg = double(mem_used) / insn_len; + json_str.attribute("avgPerItemInBytes", avg); + }); + + // Timing + json_str.attributeObject("timingInSeconds", [&] { + GetTimer().ForThread(tid).ForEachTimedTask( + [&](const std::string &name, std::chrono::milliseconds duration) { + json_str.attribute(name, duration.count() / 1000.0); + }); + }); + + // Instruction events stats + const DecodedThread::EventsStats &events_stats = + decoded_thread_sp->GetEventsStats(); + json_str.attributeObject("events", [&] { + json_str.attribute("totalCount", events_stats.total_count); + json_str.attributeObject("individualCounts", [&] { + for (const auto &event_to_count : events_stats.events_counts) { + json_str.attribute( + TraceCursor::EventKindToString(event_to_count.first), + event_to_count.second); + } + }); + }); + + if (storage.multicpu_decoder) { + json_str.attribute( + "continuousExecutions", + storage.multicpu_decoder->GetNumContinuousExecutionsForThread(tid)); + json_str.attribute( + "PSBBlocks", + storage.multicpu_decoder->GePSBBlocksCountForThread(tid)); + } + + // Errors + const DecodedThread::LibiptErrorsStats &tsc_errors_stats = + decoded_thread_sp->GetTscErrorsStats(); + json_str.attributeObject("errorItems", [&] { + json_str.attribute("total", tsc_errors_stats.total_count); + json_str.attributeObject("individualErrors", [&] { + for (const auto &error_message_to_count : + tsc_errors_stats.libipt_errors_counts) { + json_str.attribute(error_message_to_count.first, + error_message_to_count.second); + } + }); + }); + }); + json_str.attributeObject("globalStats", [&] { + json_str.attributeObject("timingInSeconds", [&] { + GetTimer().ForGlobal().ForEachTimedTask( + [&](const std::string &name, std::chrono::milliseconds duration) { + json_str.attribute(name, duration.count() / 1000.0); + }); + }); + if (storage.multicpu_decoder) { + json_str.attribute( + "totalUnattributedPSBBlocks", + storage.multicpu_decoder->GetUnattributedPSBBlocksCount()); + json_str.attribute( + "totalCountinuosExecutions", + storage.multicpu_decoder->GetTotalContinuousExecutionsCount()); + json_str.attribute("totalPSBBlocks", + storage.multicpu_decoder->GetTotalPSBBlocksCount()); + json_str.attribute( + "totalContinuousExecutions", + storage.multicpu_decoder->GetTotalContinuousExecutionsCount()); + } + }); + }); +} + llvm::Expected<Optional<uint64_t>> TraceIntelPT::GetRawTraceSize(Thread &thread) { if (GetUpdatedStorage().multicpu_decoder) @@ -408,17 +533,22 @@ const char *TraceIntelPT::GetStartConfigurationHelp() { [process tracing only] - int processBufferSizeLimit (defaults to {4} MiB): + [process tracing only] + + - boolean disableCgroupFiltering (default to {5}): [process tracing only])", kDefaultIptTraceSize, kDefaultEnableTscValue, kDefaultPsbPeriod, kDefaultPerCpuTracing, - kDefaultProcessBufferSizeLimit / 1024 / 1024)); + kDefaultProcessBufferSizeLimit / 1024 / 1024, + kDefaultDisableCgroupFiltering)); } return message->c_str(); } Error TraceIntelPT::Start(uint64_t ipt_trace_size, uint64_t total_buffer_size_limit, bool enable_tsc, - Optional<uint64_t> psb_period, bool per_cpu_tracing) { + Optional<uint64_t> psb_period, bool per_cpu_tracing, + bool disable_cgroup_filtering) { TraceIntelPTStartRequest request; request.ipt_trace_size = ipt_trace_size; request.process_buffer_size_limit = total_buffer_size_limit; @@ -426,6 +556,7 @@ Error TraceIntelPT::Start(uint64_t ipt_trace_size, request.psb_period = psb_period; request.type = GetPluginName().str(); request.per_cpu_tracing = per_cpu_tracing; + request.disable_cgroup_filtering = disable_cgroup_filtering; return Trace::Start(toJSON(request)); } @@ -435,6 +566,7 @@ Error TraceIntelPT::Start(StructuredData::ObjectSP configuration) { bool enable_tsc = kDefaultEnableTscValue; Optional<uint64_t> psb_period = kDefaultPsbPeriod; bool per_cpu_tracing = kDefaultPerCpuTracing; + bool disable_cgroup_filtering = kDefaultDisableCgroupFiltering; if (configuration) { if (StructuredData::Dictionary *dict = configuration->GetAsDictionary()) { @@ -444,6 +576,8 @@ Error TraceIntelPT::Start(StructuredData::ObjectSP configuration) { dict->GetValueForKeyAsBoolean("enableTsc", enable_tsc); dict->GetValueForKeyAsInteger("psbPeriod", psb_period); dict->GetValueForKeyAsBoolean("perCpuTracing", per_cpu_tracing); + dict->GetValueForKeyAsBoolean("disableCgroupFiltering", + disable_cgroup_filtering); } else { return createStringError(inconvertibleErrorCode(), "configuration object is not a dictionary"); @@ -451,7 +585,7 @@ Error TraceIntelPT::Start(StructuredData::ObjectSP configuration) { } return Start(ipt_trace_size, process_buffer_size_limit, enable_tsc, - psb_period, per_cpu_tracing); + psb_period, per_cpu_tracing, disable_cgroup_filtering); } llvm::Error TraceIntelPT::Start(llvm::ArrayRef<lldb::tid_t> tids, @@ -476,7 +610,20 @@ Error TraceIntelPT::Start(llvm::ArrayRef<lldb::tid_t> tids, if (configuration) { if (StructuredData::Dictionary *dict = configuration->GetAsDictionary()) { - dict->GetValueForKeyAsInteger("iptTraceSize", ipt_trace_size); + llvm::StringRef ipt_trace_size_not_parsed; + if (dict->GetValueForKeyAsString("iptTraceSize", + ipt_trace_size_not_parsed)) { + if (Optional<uint64_t> bytes = + ParsingUtils::ParseUserFriendlySizeExpression( + ipt_trace_size_not_parsed)) + ipt_trace_size = *bytes; + else + return createStringError(inconvertibleErrorCode(), + "iptTraceSize is wrong bytes expression"); + } else { + dict->GetValueForKeyAsInteger("iptTraceSize", ipt_trace_size); + } + dict->GetValueForKeyAsBoolean("enableTsc", enable_tsc); dict->GetValueForKeyAsInteger("psbPeriod", psb_period); } else { @@ -494,3 +641,11 @@ Error TraceIntelPT::OnThreadBufferRead(lldb::tid_t tid, } TaskTimer &TraceIntelPT::GetTimer() { return GetUpdatedStorage().task_timer; } + +ScopedTaskTimer &TraceIntelPT::GetThreadTimer(lldb::tid_t tid) { + return GetTimer().ForThread(tid); +} + +ScopedTaskTimer &TraceIntelPT::GetGlobalTimer() { + return GetTimer().ForGlobal(); +} diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h index 09ecbe7da61a..d3e58374867d 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h @@ -25,7 +25,8 @@ class TraceIntelPT : public Trace { public: void Dump(Stream *s) const override; - llvm::Error SaveLiveTraceToDisk(FileSpec directory) override; + llvm::Expected<FileSpec> SaveToDisk(FileSpec directory, + bool compact) override; ~TraceIntelPT() override = default; @@ -72,7 +73,8 @@ public: llvm::Expected<lldb::TraceCursorUP> CreateNewCursor(Thread &thread) override; - void DumpTraceInfo(Thread &thread, Stream &s, bool verbose) override; + void DumpTraceInfo(Thread &thread, Stream &s, bool verbose, + bool json) override; llvm::Expected<llvm::Optional<uint64_t>> GetRawTraceSize(Thread &thread); @@ -104,12 +106,16 @@ public: /// This value defines whether to have an intel pt trace buffer per thread /// or per cpu core. /// + /// \param[in] disable_cgroup_filtering + /// Disable the cgroup filtering that is automatically applied when doing + /// per cpu tracing. + /// /// \return /// \a llvm::Error::success if the operation was successful, or /// \a llvm::Error otherwise. llvm::Error Start(uint64_t ipt_trace_size, uint64_t total_buffer_size_limit, bool enable_tsc, llvm::Optional<uint64_t> psb_period, - bool m_per_cpu_tracing); + bool m_per_cpu_tracing, bool disable_cgroup_filtering); /// \copydoc Trace::Start llvm::Error Start(StructuredData::ObjectSP configuration = @@ -157,6 +163,14 @@ public: /// The timer object for this trace. TaskTimer &GetTimer(); + /// \return + /// The ScopedTaskTimer object for the given thread in this trace. + ScopedTaskTimer &GetThreadTimer(lldb::tid_t tid); + + /// \return + /// The global copedTaskTimer object for this trace. + ScopedTaskTimer &GetGlobalTimer(); + TraceIntelPTSP GetSharedPtr(); private: @@ -206,6 +220,9 @@ private: /// returned if the decoder couldn't be properly set up. llvm::Expected<DecodedThreadSP> Decode(Thread &thread); + // Dump out trace info in JSON format + void DumpTraceInfoAsJson(Thread &thread, Stream &s, bool verbose); + /// We package all the data that can change upon process stops to make sure /// this contract is very visible. /// This variable should only be accessed directly by constructores or live diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleSaver.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleSaver.cpp index b2ebaee732b8..8be70dc2139b 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleSaver.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleSaver.cpp @@ -7,8 +7,11 @@ //===----------------------------------------------------------------------===// #include "TraceIntelPTBundleSaver.h" + +#include "PerfContextSwitchDecoder.h" #include "TraceIntelPT.h" #include "TraceIntelPTJSONStructs.h" + #include "lldb/Core/Module.h" #include "lldb/Core/ModuleList.h" #include "lldb/Target/Process.h" @@ -30,6 +33,13 @@ using namespace lldb_private; using namespace lldb_private::trace_intel_pt; using namespace llvm; +/// Strip the \p directory component from the given \p path. It assumes that \p +/// directory is a prefix of \p path. +static std::string GetRelativePath(const FileSpec &directory, + const FileSpec &path) { + return path.GetPath().substr(directory.GetPath().size() + 1); +} + /// Write a stream of bytes from \p data to the given output file. /// It creates or overwrites the output file, but not append. static llvm::Error WriteBytesToDisk(FileSpec &output_file, @@ -57,11 +67,11 @@ static llvm::Error WriteBytesToDisk(FileSpec &output_file, /// The directory where the JSON file will be saved. /// /// \return -/// \a llvm::Success if the operation was successful, or an \a llvm::Error -/// otherwise. -static llvm::Error +/// A \a FileSpec pointing to the bundle description file, or an \a +/// llvm::Error otherwise. +static Expected<FileSpec> SaveTraceBundleDescription(const llvm::json::Value &trace_bundle_description, - const FileSpec &directory) { + const FileSpec &directory) { FileSpec trace_path = directory; trace_path.AppendPathComponent("trace.json"); std::ofstream os(trace_path.GetPath()); @@ -71,7 +81,7 @@ SaveTraceBundleDescription(const llvm::json::Value &trace_bundle_description, return createStringError(inconvertibleErrorCode(), formatv("couldn't write to the file {0}", trace_path.GetPath().c_str())); - return Error::success(); + return trace_path; } /// Build the threads sub-section of the trace bundle description file. @@ -106,7 +116,7 @@ BuildThreadsSection(Process &process, FileSpec directory) { if (trace_sp->GetTracedCpus().empty()) { FileSpec output_file = threads_dir; output_file.AppendPathComponent(std::to_string(tid) + ".intelpt_trace"); - json_thread.ipt_trace = output_file.GetPath(); + json_thread.ipt_trace = GetRelativePath(directory, output_file); llvm::Error err = process.GetTarget().GetTrace()->OnThreadBinaryDataRead( tid, IntelPTDataKinds::kIptTrace, @@ -122,8 +132,68 @@ BuildThreadsSection(Process &process, FileSpec directory) { return json_threads; } +/// \return +/// an \a llvm::Error in case of failures, \a None if the trace is not written +/// to disk because the trace is empty and the \p compact flag is present, or +/// the FileSpec of the trace file on disk. +static Expected<Optional<FileSpec>> +WriteContextSwitchTrace(TraceIntelPT &trace_ipt, lldb::cpu_id_t cpu_id, + const FileSpec &cpus_dir, bool compact) { + FileSpec output_context_switch_trace = cpus_dir; + output_context_switch_trace.AppendPathComponent(std::to_string(cpu_id) + + ".perf_context_switch_trace"); + + bool should_skip = false; + + Error err = trace_ipt.OnCpuBinaryDataRead( + cpu_id, IntelPTDataKinds::kPerfContextSwitchTrace, + [&](llvm::ArrayRef<uint8_t> data) -> llvm::Error { + if (!compact) + return WriteBytesToDisk(output_context_switch_trace, data); + + std::set<lldb::pid_t> pids; + for (Process *process : trace_ipt.GetAllProcesses()) + pids.insert(process->GetID()); + + Expected<std::vector<uint8_t>> compact_context_switch_trace = + FilterProcessesFromContextSwitchTrace(data, pids); + if (!compact_context_switch_trace) + return compact_context_switch_trace.takeError(); + + if (compact_context_switch_trace->empty()) { + should_skip = true; + return Error::success(); + } + + return WriteBytesToDisk(output_context_switch_trace, + *compact_context_switch_trace); + }); + if (err) + return std::move(err); + + if (should_skip) + return None; + return output_context_switch_trace; +} + +static Expected<FileSpec> WriteIntelPTTrace(TraceIntelPT &trace_ipt, + lldb::cpu_id_t cpu_id, + const FileSpec &cpus_dir) { + FileSpec output_trace = cpus_dir; + output_trace.AppendPathComponent(std::to_string(cpu_id) + ".intelpt_trace"); + + Error err = trace_ipt.OnCpuBinaryDataRead( + cpu_id, IntelPTDataKinds::kIptTrace, + [&](llvm::ArrayRef<uint8_t> data) -> llvm::Error { + return WriteBytesToDisk(output_trace, data); + }); + if (err) + return std::move(err); + return output_trace; +} + static llvm::Expected<llvm::Optional<std::vector<JSONCpu>>> -BuildCpusSection(TraceIntelPT &trace_ipt, FileSpec directory) { +BuildCpusSection(TraceIntelPT &trace_ipt, FileSpec directory, bool compact) { if (trace_ipt.GetTracedCpus().empty()) return None; @@ -135,36 +205,21 @@ BuildCpusSection(TraceIntelPT &trace_ipt, FileSpec directory) { for (lldb::cpu_id_t cpu_id : trace_ipt.GetTracedCpus()) { JSONCpu json_cpu; json_cpu.id = cpu_id; + Expected<Optional<FileSpec>> context_switch_trace_path = + WriteContextSwitchTrace(trace_ipt, cpu_id, cpus_dir, compact); + if (!context_switch_trace_path) + return context_switch_trace_path.takeError(); + if (!*context_switch_trace_path) + continue; + json_cpu.context_switch_trace = + GetRelativePath(directory, **context_switch_trace_path); - { - FileSpec output_trace = cpus_dir; - output_trace.AppendPathComponent(std::to_string(cpu_id) + - ".intelpt_trace"); - json_cpu.ipt_trace = output_trace.GetPath(); - - llvm::Error err = trace_ipt.OnCpuBinaryDataRead( - cpu_id, IntelPTDataKinds::kIptTrace, - [&](llvm::ArrayRef<uint8_t> data) -> llvm::Error { - return WriteBytesToDisk(output_trace, data); - }); - if (err) - return std::move(err); - } - - { - FileSpec output_context_switch_trace = cpus_dir; - output_context_switch_trace.AppendPathComponent( - std::to_string(cpu_id) + ".perf_context_switch_trace"); - json_cpu.context_switch_trace = output_context_switch_trace.GetPath(); + if (Expected<FileSpec> ipt_trace_path = + WriteIntelPTTrace(trace_ipt, cpu_id, cpus_dir)) + json_cpu.ipt_trace = GetRelativePath(directory, *ipt_trace_path); + else + return ipt_trace_path.takeError(); - llvm::Error err = trace_ipt.OnCpuBinaryDataRead( - cpu_id, IntelPTDataKinds::kPerfContextSwitchTrace, - [&](llvm::ArrayRef<uint8_t> data) -> llvm::Error { - return WriteBytesToDisk(output_context_switch_trace, data); - }); - if (err) - return std::move(err); - } json_cpus.push_back(std::move(json_cpu)); } return json_cpus; @@ -222,14 +277,14 @@ BuildModulesSection(Process &process, FileSpec directory) { path_to_copy_module.AppendPathComponent(system_path); sys::fs::create_directories(path_to_copy_module.GetDirectory().AsCString()); - if (std::error_code ec = llvm::sys::fs::copy_file( - system_path, path_to_copy_module.GetPath())) + if (std::error_code ec = + llvm::sys::fs::copy_file(file, path_to_copy_module.GetPath())) return createStringError( inconvertibleErrorCode(), formatv("couldn't write to the file. {0}", ec.message())); json_modules.push_back( - JSONModule{system_path, path_to_copy_module.GetPath(), + JSONModule{system_path, GetRelativePath(directory, path_to_copy_module), JSONUINT64{load_addr}, module_sp->GetUUID().GetAsString()}); } return json_modules; @@ -280,8 +335,9 @@ BuildProcessesSection(TraceIntelPT &trace_ipt, const FileSpec &directory) { return processes; } -Error TraceIntelPTBundleSaver::SaveToDisk(TraceIntelPT &trace_ipt, - FileSpec directory) { +Expected<FileSpec> TraceIntelPTBundleSaver::SaveToDisk(TraceIntelPT &trace_ipt, + FileSpec directory, + bool compact) { if (std::error_code ec = sys::fs::create_directories(directory.GetPath().c_str())) return llvm::errorCodeToError(ec); @@ -299,7 +355,7 @@ Error TraceIntelPTBundleSaver::SaveToDisk(TraceIntelPT &trace_ipt, return json_processes.takeError(); Expected<Optional<std::vector<JSONCpu>>> json_cpus = - BuildCpusSection(trace_ipt, directory); + BuildCpusSection(trace_ipt, directory, compact); if (!json_cpus) return json_cpus.takeError(); diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleSaver.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleSaver.h index c36677e1c00d..7224636f0c74 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleSaver.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleSaver.h @@ -31,10 +31,16 @@ public: /// \param[in] directory /// The directory where the trace bundle will be created. /// + /// \param[in] compact + /// Filter out information irrelevant to the traced processes in the + /// context switch and intel pt traces when using per-cpu mode. This + /// effectively reduces the size of those traces. + /// /// \return - /// \a llvm::success if the operation was successful, or an \a llvm::Error - /// otherwise. - llvm::Error SaveToDisk(TraceIntelPT &trace_ipt, FileSpec directory); + /// A \a FileSpec pointing to the bundle description file, or an \a + /// llvm::Error otherwise. + llvm::Expected<FileSpec> SaveToDisk(TraceIntelPT &trace_ipt, + FileSpec directory, bool compact); }; } // namespace trace_intel_pt diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTConstants.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTConstants.h index 61fdb4574d54..43c86fca3425 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTConstants.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTConstants.h @@ -21,6 +21,7 @@ const size_t kDefaultProcessBufferSizeLimit = 5 * 1024 * 1024; // 500MB const bool kDefaultEnableTscValue = false; const llvm::Optional<size_t> kDefaultPsbPeriod = llvm::None; const bool kDefaultPerCpuTracing = false; +const bool kDefaultDisableCgroupFiltering = false; } // namespace trace_intel_pt } // namespace lldb_private diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp index d2dbc049672c..e547032f739d 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp @@ -1,4 +1,4 @@ -//===-- TraceIntelPTMultiCpuDecoder.cpp ----0------------------------------===// +//===-- TraceIntelPTMultiCpuDecoder.cpp -----------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -39,30 +39,35 @@ Expected<DecodedThreadSP> TraceIntelPTMultiCpuDecoder::Decode(Thread &thread) { if (Error err = CorrelateContextSwitchesAndIntelPtTraces()) return std::move(err); - auto it = m_decoded_threads.find(thread.GetID()); - if (it != m_decoded_threads.end()) - return it->second; - - DecodedThreadSP decoded_thread_sp = - std::make_shared<DecodedThread>(thread.shared_from_this()); - TraceIntelPTSP trace_sp = GetTrace(); - Error err = trace_sp->OnAllCpusBinaryDataRead( - IntelPTDataKinds::kIptTrace, - [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error { - auto it = m_continuous_executions_per_thread->find(thread.GetID()); - if (it != m_continuous_executions_per_thread->end()) - return DecodeSystemWideTraceForThread(*decoded_thread_sp, *trace_sp, - buffers, it->second); - - return Error::success(); + return trace_sp + ->GetThreadTimer(thread.GetID()) + .TimeTask("Decoding instructions", [&]() -> Expected<DecodedThreadSP> { + auto it = m_decoded_threads.find(thread.GetID()); + if (it != m_decoded_threads.end()) + return it->second; + + DecodedThreadSP decoded_thread_sp = + std::make_shared<DecodedThread>(thread.shared_from_this()); + + Error err = trace_sp->OnAllCpusBinaryDataRead( + IntelPTDataKinds::kIptTrace, + [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error { + auto it = + m_continuous_executions_per_thread->find(thread.GetID()); + if (it != m_continuous_executions_per_thread->end()) + return DecodeSystemWideTraceForThread( + *decoded_thread_sp, *trace_sp, buffers, it->second); + + return Error::success(); + }); + if (err) + return std::move(err); + + m_decoded_threads.try_emplace(thread.GetID(), decoded_thread_sp); + return decoded_thread_sp; }); - if (err) - return std::move(err); - - m_decoded_threads.try_emplace(thread.GetID(), decoded_thread_sp); - return decoded_thread_sp; } static Expected<std::vector<IntelPTThreadSubtrace>> @@ -105,6 +110,7 @@ TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() { if (!intel_pt_subtraces) return intel_pt_subtraces.takeError(); + m_total_psb_blocks += intel_pt_subtraces->size(); // We'll be iterating through the thread continuous executions and the intel // pt subtraces sorted by time. auto it = intel_pt_subtraces->begin(); @@ -118,7 +124,7 @@ TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() { if (it->tsc > thread_execution.GetStartTSC()) { execution.intelpt_subtraces.push_back(*it); } else { - m_unattributed_intelpt_subtraces++; + m_unattributed_psb_blocks++; } } continuous_executions_per_thread[thread_execution.tid].push_back( @@ -137,6 +143,8 @@ TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() { }); if (err) return std::move(err); + + m_unattributed_psb_blocks += intel_pt_subtraces->end() - it; } // We now sort the executions of each thread to have them ready for // instruction decoding @@ -153,7 +161,7 @@ Error TraceIntelPTMultiCpuDecoder::CorrelateContextSwitchesAndIntelPtTraces() { if (m_continuous_executions_per_thread) return Error::success(); - Error err = GetTrace()->GetTimer().ForGlobal().TimeTask<Error>( + Error err = GetTrace()->GetGlobalTimer().TimeTask( "Context switch and Intel PT traces correlation", [&]() -> Error { if (auto correlation = DoCorrelateContextSwitchesAndIntelPtTraces()) { m_continuous_executions_per_thread.emplace(std::move(*correlation)); @@ -187,3 +195,24 @@ size_t TraceIntelPTMultiCpuDecoder::GetTotalContinuousExecutionsCount() const { count += kv.second.size(); return count; } + +size_t +TraceIntelPTMultiCpuDecoder::GePSBBlocksCountForThread(lldb::tid_t tid) const { + if (!m_continuous_executions_per_thread) + return 0; + size_t count = 0; + auto it = m_continuous_executions_per_thread->find(tid); + if (it == m_continuous_executions_per_thread->end()) + return 0; + for (const IntelPTThreadContinousExecution &execution : it->second) + count += execution.intelpt_subtraces.size(); + return count; +} + +size_t TraceIntelPTMultiCpuDecoder::GetUnattributedPSBBlocksCount() const { + return m_unattributed_psb_blocks; +} + +size_t TraceIntelPTMultiCpuDecoder::GetTotalPSBBlocksCount() const { + return m_total_psb_blocks; +} diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h index 11771e018f7b..3b7926760f3c 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h @@ -50,9 +50,22 @@ public: size_t GetNumContinuousExecutionsForThread(lldb::tid_t tid) const; /// \return + /// The number of PSB blocks for a given thread in all cores. + size_t GePSBBlocksCountForThread(lldb::tid_t tid) const; + + /// \return /// The total number of continuous executions found across CPUs. size_t GetTotalContinuousExecutionsCount() const; + /// \return + /// The number of psb blocks in all cores that couldn't be matched with a + /// thread execution coming from context switch traces. + size_t GetUnattributedPSBBlocksCount() const; + + /// \return + /// The total number of PSB blocks in all cores. + size_t GetTotalPSBBlocksCount() const; + private: /// Traverse the context switch traces and the basic intel pt continuous /// subtraces and produce a list of continuous executions for each process and @@ -80,7 +93,8 @@ private: /// This variable will be non-None if a severe error happened during the setup /// of the decoder and we don't want decoding to be reattempted. llvm::Optional<std::string> m_setup_error; - uint64_t m_unattributed_intelpt_subtraces; + uint64_t m_unattributed_psb_blocks = 0; + uint64_t m_total_psb_blocks = 0; }; } // namespace trace_intel_pt diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTOptions.td b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTOptions.td index 29aa1459306a..95bd5c3d1cce 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTOptions.td +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTOptions.td @@ -11,7 +11,9 @@ let Command = "thread trace start intel pt" in { Arg<"Value">, Desc<"Trace size in bytes per thread. It must be a power of 2 greater " "than or equal to 4096 (2^12). The trace is circular keeping " - "the most recent data. Defaults to 4096 bytes.">; + "the most recent data. Defaults to 4096 bytes. It's possible to " + "specify size using multiples of unit bytes, e.g., 4KB, 1MB, 1MiB, " + "where 1K is 1024 bytes and 1M is 1048576 bytes.">; def thread_trace_start_intel_pt_tsc: Option<"tsc", "t">, Group<1>, Desc<"Enable the use of TSC timestamps. This is supported on all devices " @@ -40,7 +42,8 @@ let Command = "process trace start intel pt" in { Arg<"Value">, Desc<"Size in bytes used by each individual per-thread or per-cpu trace " "buffer. It must be a power of 2 greater than or equal to 4096 (2^12) " - "bytes.">; + "bytes. It's possible to specify a unit for these bytes, like 4KB, " + "16KiB or 1MB. Lower case units are allowed for convenience.">; def process_trace_start_intel_pt_per_cpu_tracing: Option<"per-cpu-tracing", "c">, Group<1>, @@ -53,7 +56,8 @@ let Command = "process trace start intel pt" in { "option forces the capture of TSC timestamps (see --tsc). Also, this " "option can't be used simulatenously with any other trace sessions " "because of its system-wide nature.">; - def process_trace_start_intel_pt_process_size_limit: Option<"total-size-limit", "l">, + def process_trace_start_intel_pt_process_size_limit: + Option<"total-size-limit", "l">, Group<1>, Arg<"Value">, Desc<"Maximum total trace size per process in bytes. This limit applies to " @@ -62,7 +66,9 @@ let Command = "process trace start intel pt" in { "Whenever a thread is attempted to be traced due to this command and " "the limit would be reached, the process is stopped with a " "\"processor trace\" reason, so that the user can retrace the process " - "if needed. Defaults to 500MB.">; + "if needed. Defaults to 500MB. It's possible to specify a unit for " + "these bytes, like 4KB, 16KiB or 1MB. Lower case units are allowed " + "for convenience.">; def process_trace_start_intel_pt_tsc: Option<"tsc", "t">, Group<1>, Desc<"Enable the use of TSC timestamps. This is supported on all devices " @@ -83,14 +89,9 @@ let Command = "process trace start intel pt" in { "converted to the approximate number of raw trace bytes between PSB " "packets as: 2 ^ (value + 11), e.g. value 3 means 16KiB between PSB " "packets. Defaults to 0 if supported.">; -} - -let Command = "process trace save intel pt" in { - def process_trace_save_intel_directory: Option<"directory", "d">, - Group<1>, - Arg<"Value">, Required, - Desc<"This value defines the directory where the trace will be saved." - "It will be created if it does not exist. It will also create a " - "trace files with the trace data and a trace.json with the main " - "properties of the trace session.">; + def process_trace_start_intel_pt_disable_cgroup_filtering: + Option<"disable-cgroup-filtering", "d">, + Desc<"Disable the automatic cgroup filtering that is applied if --per-cpu " + "is provided. Cgroup filtering allows collecting intel pt data " + "exclusively of processes of the same cgroup as the target.">; } diff --git a/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp b/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp index 7deeaf2bf10f..d8e44ee66de8 100644 --- a/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp +++ b/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp @@ -160,15 +160,15 @@ TraceHTR::TraceHTR(Thread &thread, TraceCursor &cursor) cursor.Next(); } else { lldb::addr_t current_instruction_load_address = cursor.GetLoadAddress(); - lldb::TraceInstructionControlFlowType current_instruction_type = - cursor.GetInstructionControlFlowType(); + lldb::InstructionControlFlowKind current_instruction_type = + cursor.GetInstructionControlFlowKind(); m_instruction_layer_up->AppendInstruction( current_instruction_load_address); cursor.Next(); bool more_data_in_trace = cursor.HasValue(); if (current_instruction_type & - lldb::eTraceInstructionControlFlowTypeCall) { + lldb::eInstructionControlFlowKindCall) { if (more_data_in_trace && !cursor.IsError()) { m_instruction_layer_up->AddCallInstructionMetadata( current_instruction_load_address, diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp index ac5e316eecb0..03515a32ff86 100644 --- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp +++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp @@ -83,6 +83,7 @@ bool UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly( const uint32_t addr_byte_size = m_arch.GetAddressByteSize(); const bool show_address = true; const bool show_bytes = true; + const bool show_control_flow_kind = true; m_inst_emulator_up->GetRegisterInfo(unwind_plan.GetRegisterKind(), unwind_plan.GetInitialCFARegister(), m_cfa_reg_info); @@ -244,7 +245,8 @@ bool UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly( lldb_private::FormatEntity::Entry format; FormatEntity::Parse("${frame.pc}: ", format); inst->Dump(&strm, inst_list.GetMaxOpcocdeByteSize(), show_address, - show_bytes, nullptr, nullptr, nullptr, &format, 0); + show_bytes, show_control_flow_kind, nullptr, nullptr, + nullptr, &format, 0); log->PutString(strm.GetString()); } diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index 648a12524aed..8ec4bc90cd13 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -196,11 +196,10 @@ Function *IndirectCallEdge::GetCallee(ModuleList &images, Log *log = GetLog(LLDBLog::Step); Status error; Value callee_addr_val; - if (!call_target.Evaluate(&exe_ctx, exe_ctx.GetRegisterContext(), - /*loclist_base_load_addr=*/LLDB_INVALID_ADDRESS, - /*initial_value_ptr=*/nullptr, - /*object_address_ptr=*/nullptr, callee_addr_val, - &error)) { + if (!call_target.Evaluate( + &exe_ctx, exe_ctx.GetRegisterContext(), LLDB_INVALID_ADDRESS, + /*initial_value_ptr=*/nullptr, + /*object_address_ptr=*/nullptr, callee_addr_val, &error)) { LLDB_LOGF(log, "IndirectCallEdge: Could not evaluate expression: %s", error.AsCString()); return nullptr; @@ -440,8 +439,9 @@ bool Function::GetDisassembly(const ExecutionContext &exe_ctx, if (disassembler_sp) { const bool show_address = true; const bool show_bytes = false; - disassembler_sp->GetInstructionList().Dump(&strm, show_address, show_bytes, - &exe_ctx); + const bool show_control_flow_kind = false; + disassembler_sp->GetInstructionList().Dump( + &strm, show_address, show_bytes, show_control_flow_kind, &exe_ctx); return true; } return false; diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp index 9ec7f2638f71..668276aa2500 100644 --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -558,8 +558,9 @@ bool Symbol::GetDisassembly(const ExecutionContext &exe_ctx, const char *flavor, if (disassembler_sp) { const bool show_address = true; const bool show_bytes = false; - disassembler_sp->GetInstructionList().Dump(&strm, show_address, show_bytes, - &exe_ctx); + const bool show_control_flow_kind = false; + disassembler_sp->GetInstructionList().Dump( + &strm, show_address, show_bytes, show_control_flow_kind, &exe_ctx); return true; } return false; diff --git a/lldb/source/Symbol/TypeList.cpp b/lldb/source/Symbol/TypeList.cpp index ace715d933ea..494e59e3a0fc 100644 --- a/lldb/source/Symbol/TypeList.cpp +++ b/lldb/source/Symbol/TypeList.cpp @@ -97,7 +97,7 @@ void TypeList::Dump(Stream *s, bool show_context) { } } -void TypeList::RemoveMismatchedTypes(const char *qualified_typename, +void TypeList::RemoveMismatchedTypes(llvm::StringRef qualified_typename, bool exact_match) { llvm::StringRef type_scope; llvm::StringRef type_basename; @@ -107,13 +107,12 @@ void TypeList::RemoveMismatchedTypes(const char *qualified_typename, type_basename = qualified_typename; type_scope = ""; } - return RemoveMismatchedTypes(std::string(type_scope), - std::string(type_basename), type_class, + return RemoveMismatchedTypes(type_scope, type_basename, type_class, exact_match); } -void TypeList::RemoveMismatchedTypes(const std::string &type_scope, - const std::string &type_basename, +void TypeList::RemoveMismatchedTypes(llvm::StringRef type_scope, + llvm::StringRef type_basename, TypeClass type_class, bool exact_match) { // Our "collection" type currently is a std::map which doesn't have any good // way to iterate and remove items from the map so we currently just make a diff --git a/lldb/source/Symbol/TypeMap.cpp b/lldb/source/Symbol/TypeMap.cpp index 2cda9b6c27d1..0d5f6d53e5a0 100644 --- a/lldb/source/Symbol/TypeMap.cpp +++ b/lldb/source/Symbol/TypeMap.cpp @@ -127,23 +127,8 @@ void TypeMap::Dump(Stream *s, bool show_context, lldb::DescriptionLevel level) { } } -void TypeMap::RemoveMismatchedTypes(const char *qualified_typename, - bool exact_match) { - llvm::StringRef type_scope; - llvm::StringRef type_basename; - TypeClass type_class = eTypeClassAny; - if (!Type::GetTypeScopeAndBasename(qualified_typename, type_scope, - type_basename, type_class)) { - type_basename = qualified_typename; - type_scope = ""; - } - return RemoveMismatchedTypes(std::string(type_scope), - std::string(type_basename), type_class, - exact_match); -} - -void TypeMap::RemoveMismatchedTypes(const std::string &type_scope, - const std::string &type_basename, +void TypeMap::RemoveMismatchedTypes(llvm::StringRef type_scope, + llvm::StringRef type_basename, TypeClass type_class, bool exact_match) { // Our "collection" type currently is a std::map which doesn't have any good // way to iterate and remove items from the map so we currently just make a @@ -214,25 +199,3 @@ void TypeMap::RemoveMismatchedTypes(const std::string &type_scope, } m_types.swap(matching_types); } - -void TypeMap::RemoveMismatchedTypes(TypeClass type_class) { - if (type_class == eTypeClassAny) - return; - - // Our "collection" type currently is a std::map which doesn't have any good - // way to iterate and remove items from the map so we currently just make a - // new list and add all of the matching types to it, and then swap it into - // m_types at the end - collection matching_types; - - iterator pos, end = m_types.end(); - - for (pos = m_types.begin(); pos != end; ++pos) { - Type *the_type = pos->second.get(); - TypeClass match_type_class = - the_type->GetForwardCompilerType().GetTypeClass(); - if (match_type_class & type_class) - matching_types.insert(*pos); - } - m_types.swap(matching_types); -} diff --git a/lldb/source/Symbol/Variable.cpp b/lldb/source/Symbol/Variable.cpp index b92c86654496..f65e73e5d049 100644 --- a/lldb/source/Symbol/Variable.cpp +++ b/lldb/source/Symbol/Variable.cpp @@ -39,13 +39,13 @@ Variable::Variable(lldb::user_id_t uid, const char *name, const char *mangled, const lldb::SymbolFileTypeSP &symfile_type_sp, ValueType scope, SymbolContextScope *context, const RangeList &scope_range, Declaration *decl_ptr, - const DWARFExpression &location, bool external, + const DWARFExpressionList &location_list, bool external, bool artificial, bool location_is_constant_data, bool static_member) : UserID(uid), m_name(name), m_mangled(ConstString(mangled)), m_symfile_type_sp(symfile_type_sp), m_scope(scope), m_owner_scope(context), m_scope_range(scope_range), - m_declaration(decl_ptr), m_location(location), m_external(external), + m_declaration(decl_ptr), m_location_list(location_list), m_external(external), m_artificial(artificial), m_loc_is_const_data(location_is_constant_data), m_static_member(static_member) {} @@ -145,7 +145,7 @@ void Variable::Dump(Stream *s, bool show_context) const { bool show_fullpaths = false; m_declaration.Dump(s, show_fullpaths); - if (m_location.IsValid()) { + if (m_location_list.IsValid()) { s->PutCString(", location = "); ABISP abi; if (m_owner_scope) { @@ -153,7 +153,7 @@ void Variable::Dump(Stream *s, bool show_context) const { if (module_sp) abi = ABI::FindPlugin(ProcessSP(), module_sp->GetArchitecture()); } - m_location.GetDescription(s, lldb::eDescriptionLevelBrief, abi.get()); + m_location_list.GetDescription(s, lldb::eDescriptionLevelBrief, abi.get()); } if (m_external) @@ -212,12 +212,6 @@ void Variable::CalculateSymbolContext(SymbolContext *sc) { } bool Variable::LocationIsValidForFrame(StackFrame *frame) { - // Is the variable is described by a single location? - if (!m_location.IsLocationList()) { - // Yes it is, the location is valid. - return true; - } - if (frame) { Function *function = frame->GetSymbolContext(eSymbolContextFunction).function; @@ -231,7 +225,7 @@ bool Variable::LocationIsValidForFrame(StackFrame *frame) { return false; // It is a location list. We just need to tell if the location list // contains the current address when converted to a load address - return m_location.LocationListContainsAddress( + return m_location_list.ContainsAddress( loclist_base_load_addr, frame->GetFrameCodeAddress().GetLoadAddress(target_sp.get())); } @@ -244,7 +238,7 @@ bool Variable::LocationIsValidForAddress(const Address &address) { // function. if (address.IsSectionOffset()) { // We need to check if the address is valid for both scope range and value - // range. + // range. // Empty scope range means block range. bool valid_in_scope_range = GetScopeRange().IsEmpty() || GetScopeRange().FindEntryThatContains( @@ -255,7 +249,7 @@ bool Variable::LocationIsValidForAddress(const Address &address) { CalculateSymbolContext(&sc); if (sc.module_sp == address.GetModule()) { // Is the variable is described by a single location? - if (!m_location.IsLocationList()) { + if (m_location_list.IsAlwaysValidSingleExpr()) { // Yes it is, the location is valid. return true; } @@ -267,8 +261,8 @@ bool Variable::LocationIsValidForAddress(const Address &address) { return false; // It is a location list. We just need to tell if the location list // contains the current address when converted to a load address - return m_location.LocationListContainsAddress(loclist_base_file_addr, - address.GetFileAddress()); + return m_location_list.ContainsAddress(loclist_base_file_addr, + address.GetFileAddress()); } } } @@ -459,9 +453,9 @@ bool Variable::DumpLocations(Stream *s, const Address &address) { sc.function->GetAddressRange().GetBaseAddress().GetFileAddress(); if (loclist_base_file_addr == LLDB_INVALID_ADDRESS) return false; - return m_location.DumpLocations(s, eDescriptionLevelBrief, - loclist_base_file_addr, file_addr, - abi.get()); + return m_location_list.DumpLocations(s, eDescriptionLevelBrief, + loclist_base_file_addr, file_addr, + abi.get()); } return false; } diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp index e98aed7e1555..a0f97d7e7cff 100644 --- a/lldb/source/Target/RegisterContextUnwind.cpp +++ b/lldb/source/Target/RegisterContextUnwind.cpp @@ -11,7 +11,7 @@ #include "lldb/Core/AddressRange.h" #include "lldb/Core/Module.h" #include "lldb/Core/Value.h" -#include "lldb/Expression/DWARFExpression.h" +#include "lldb/Expression/DWARFExpressionList.h" #include "lldb/Symbol/ArmUnwindInfo.h" #include "lldb/Symbol/CallFrameInfo.h" #include "lldb/Symbol/DWARFCallFrameInfo.h" @@ -381,7 +381,7 @@ void RegisterContextUnwind::InitializeNonZerothFrame() { // symbol/function information - just stick in some reasonable defaults and // hope we can unwind past this frame. If we're above a trap handler, // we may be at a bogus address because we jumped through a bogus function - // pointer and trapped, so don't force the arch default unwind plan in that + // pointer and trapped, so don't force the arch default unwind plan in that // case. ModuleSP pc_module_sp(m_current_pc.GetModule()); if ((!m_current_pc.IsValid() || !pc_module_sp) && @@ -1286,7 +1286,7 @@ RegisterContextUnwind::SavedLocationForRegister( // arch default unwind plan is used as the Fast Unwind Plan, we // need to recognize this & switch over to the Full Unwind Plan // to see what unwind rule that (more knoweldgeable, probably) - // UnwindPlan has. If the full UnwindPlan says the register + // UnwindPlan has. If the full UnwindPlan says the register // location is Undefined, then it really is. if (active_row->GetRegisterInfo(regnum.GetAsKind(unwindplan_registerkind), unwindplan_regloc) && @@ -1335,13 +1335,13 @@ RegisterContextUnwind::SavedLocationForRegister( m_full_unwind_plan_sp->GetReturnAddressRegister() != LLDB_INVALID_REGNUM) { // If this is a trap handler frame, we should have access to - // the complete register context when the interrupt/async + // the complete register context when the interrupt/async // signal was received, we should fetch the actual saved $pc // value instead of the Return Address register. // If $pc is not available, fall back to the RA reg. UnwindPlan::Row::RegisterLocation scratch; if (m_frame_type == eTrapHandlerFrame && - active_row->GetRegisterInfo + active_row->GetRegisterInfo (pc_regnum.GetAsKind (unwindplan_registerkind), scratch)) { UnwindLogMsg("Providing pc register instead of rewriting to " "RA reg because this is a trap handler and there is " @@ -1642,8 +1642,9 @@ RegisterContextUnwind::SavedLocationForRegister( process->GetByteOrder(), process->GetAddressByteSize()); ModuleSP opcode_ctx; - DWARFExpression dwarfexpr(opcode_ctx, dwarfdata, nullptr); - dwarfexpr.SetRegisterKind(unwindplan_registerkind); + DWARFExpressionList dwarfexpr(opcode_ctx, dwarfdata, nullptr); + dwarfexpr.GetMutableExpressionAtAddress()->SetRegisterKind( + unwindplan_registerkind); Value cfa_val = Scalar(m_cfa); cfa_val.SetValueType(Value::ValueType::LoadAddress); Value result; @@ -2006,8 +2007,9 @@ bool RegisterContextUnwind::ReadFrameAddress( process->GetByteOrder(), process->GetAddressByteSize()); ModuleSP opcode_ctx; - DWARFExpression dwarfexpr(opcode_ctx, dwarfdata, nullptr); - dwarfexpr.SetRegisterKind(row_register_kind); + DWARFExpressionList dwarfexpr(opcode_ctx, dwarfdata, nullptr); + dwarfexpr.GetMutableExpressionAtAddress()->SetRegisterKind( + row_register_kind); Value result; Status error; if (dwarfexpr.Evaluate(&exe_ctx, this, 0, nullptr, nullptr, result, diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp index 1e3dbc73a04e..e87cf5af3e39 100644 --- a/lldb/source/Target/StackFrame.cpp +++ b/lldb/source/Target/StackFrame.cpp @@ -1087,7 +1087,7 @@ bool StackFrame::GetFrameBaseValue(Scalar &frame_base, Status *error_ptr) { ExecutionContext exe_ctx(shared_from_this()); Value expr_value; addr_t loclist_base_addr = LLDB_INVALID_ADDRESS; - if (m_sc.function->GetFrameBaseExpression().IsLocationList()) + if (!m_sc.function->GetFrameBaseExpression().IsAlwaysValidSingleExpr()) loclist_base_addr = m_sc.function->GetAddressRange().GetBaseAddress().GetLoadAddress( exe_ctx.GetTargetPtr()); @@ -1116,7 +1116,7 @@ bool StackFrame::GetFrameBaseValue(Scalar &frame_base, Status *error_ptr) { return m_frame_base_error.Success(); } -DWARFExpression *StackFrame::GetFrameBaseExpression(Status *error_ptr) { +DWARFExpressionList *StackFrame::GetFrameBaseExpression(Status *error_ptr) { if (!m_sc.function) { if (error_ptr) { error_ptr->SetErrorString("No function in symbol context."); @@ -1200,7 +1200,7 @@ lldb::LanguageType StackFrame::GuessLanguage() { LanguageType lang_type = GetLanguage(); if (lang_type == eLanguageTypeUnknown) { - SymbolContext sc = GetSymbolContext(eSymbolContextFunction + SymbolContext sc = GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol); if (sc.function) { lang_type = sc.function->GetMangled().GuessLanguage(); @@ -1417,7 +1417,7 @@ ValueObjectSP GetValueForDereferincingOffset(StackFrame &frame, Status error; ValueObjectSP pointee = base->Dereference(error); - + if (!pointee) { return ValueObjectSP(); } @@ -1505,7 +1505,7 @@ lldb::ValueObjectSP DoGuessValueAt(StackFrame &frame, ConstString reg, Instruction::Operand::BuildRegister(reg)); for (VariableSP var_sp : variables) { - if (var_sp->LocationExpression().MatchesOperand(frame, op)) + if (var_sp->LocationExpressionList().MatchesOperand(frame, op)) return frame.GetValueObjectForFrameVariable(var_sp, eNoDynamicValues); } diff --git a/lldb/source/Target/ThreadPlanTracer.cpp b/lldb/source/Target/ThreadPlanTracer.cpp index f2346fc237ce..f5331428038b 100644 --- a/lldb/source/Target/ThreadPlanTracer.cpp +++ b/lldb/source/Target/ThreadPlanTracer.cpp @@ -170,13 +170,14 @@ void ThreadPlanAssemblyTracer::Log() { if (instruction_list.GetSize()) { const bool show_bytes = true; const bool show_address = true; + const bool show_control_flow_kind = true; Instruction *instruction = instruction_list.GetInstructionAtIndex(0).get(); const FormatEntity::Entry *disassemble_format = m_process.GetTarget().GetDebugger().GetDisassemblyFormat(); instruction->Dump(stream, max_opcode_byte_size, show_address, - show_bytes, nullptr, nullptr, nullptr, - disassemble_format, 0); + show_bytes, show_control_flow_kind, nullptr, nullptr, + nullptr, disassemble_format, 0); } } } diff --git a/lldb/source/Target/TraceCursor.cpp b/lldb/source/Target/TraceCursor.cpp index 1c3fabc4dec0..f99b0d28c154 100644 --- a/lldb/source/Target/TraceCursor.cpp +++ b/lldb/source/Target/TraceCursor.cpp @@ -48,5 +48,8 @@ const char *TraceCursor::EventKindToString(lldb::TraceEvent event_kind) { return "hardware disabled tracing"; case lldb::eTraceEventDisabledSW: return "software disabled tracing"; + case lldb::eTraceEventCPUChanged: + return "CPU core changed"; } + llvm_unreachable("Fully covered switch above"); } diff --git a/lldb/source/Target/TraceDumper.cpp b/lldb/source/Target/TraceDumper.cpp index 6a5fd0268e02..739105e9e9fb 100644 --- a/lldb/source/Target/TraceDumper.cpp +++ b/lldb/source/Target/TraceDumper.cpp @@ -129,32 +129,30 @@ public: m_s.Format(" {0}: ", item.id); if (m_options.show_tsc) { - m_s << "[tsc="; - - if (item.tsc) - m_s.Format("{0}", *item.tsc); - else - m_s << "unavailable"; - - m_s << "] "; + m_s.Format("[tsc={0}] ", + item.tsc ? std::to_string(*item.tsc) : "unavailable"); } if (item.event) { m_s << "(event) " << TraceCursor::EventKindToString(*item.event); + if (*item.event == eTraceEventCPUChanged) { + m_s.Format(" [new CPU={0}]", + item.cpu_id ? std::to_string(*item.cpu_id) : "unavailable"); + } } else if (item.error) { m_s << "(error) " << *item.error; } else { m_s.Format("{0:x+16}", item.load_address); - if (item.symbol_info) { + if (item.symbol_info && item.symbol_info->instruction) { m_s << " "; - item.symbol_info->instruction->Dump(&m_s, /*max_opcode_byte_size=*/0, - /*show_address=*/false, - /*show_bytes=*/false, - &item.symbol_info->exe_ctx, - &item.symbol_info->sc, - /*prev_sym_ctx=*/nullptr, - /*disassembly_addr_format=*/nullptr, - /*max_address_text_size=*/0); + item.symbol_info->instruction->Dump( + &m_s, /*max_opcode_byte_size=*/0, + /*show_address=*/false, + /*show_bytes=*/false, m_options.show_control_flow_kind, + &item.symbol_info->exe_ctx, &item.symbol_info->sc, + /*prev_sym_ctx=*/nullptr, + /*disassembly_addr_format=*/nullptr, + /*max_address_text_size=*/0); } } @@ -172,14 +170,16 @@ class OutputWriterJSON : public TraceDumper::OutputWriter { /* schema: error_message: string | { + "event": string, "id": decimal, "tsc"?: string decimal, - "event": string + "cpuId"? decimal, } | { + "error": string, "id": decimal, "tsc"?: string decimal, - "error": string, | { + "loadAddress": string decimal, "id": decimal, "tsc"?: string decimal, "module"?: string, @@ -200,6 +200,37 @@ public: ~OutputWriterJSON() { m_j.arrayEnd(); } + void DumpEvent(const TraceDumper::TraceItem &item) { + m_j.attribute("event", TraceCursor::EventKindToString(*item.event)); + if (item.event == eTraceEventCPUChanged) + m_j.attribute("cpuId", item.cpu_id); + } + + void DumpInstruction(const TraceDumper::TraceItem &item) { + m_j.attribute("loadAddress", formatv("{0:x}", item.load_address)); + if (item.symbol_info) { + m_j.attribute("module", ToOptionalString(GetModuleName(item))); + m_j.attribute( + "symbol", + ToOptionalString(item.symbol_info->sc.GetFunctionName().AsCString())); + + if (item.symbol_info->instruction) { + m_j.attribute("mnemonic", + ToOptionalString(item.symbol_info->instruction->GetMnemonic( + &item.symbol_info->exe_ctx))); + } + + if (IsLineEntryValid(item.symbol_info->sc.line_entry)) { + m_j.attribute( + "source", + ToOptionalString( + item.symbol_info->sc.line_entry.file.GetPath().c_str())); + m_j.attribute("line", item.symbol_info->sc.line_entry.line); + m_j.attribute("column", item.symbol_info->sc.line_entry.column); + } + } + } + void TraceItem(const TraceDumper::TraceItem &item) override { m_j.object([&] { m_j.attribute("id", item.id); @@ -209,37 +240,11 @@ public: item.tsc ? Optional<std::string>(std::to_string(*item.tsc)) : None); if (item.event) { - m_j.object([&] { - m_j.attribute("event", TraceCursor::EventKindToString(*item.event)); - }); - return; - } - - if (item.error) { + DumpEvent(item); + } else if (item.error) { m_j.attribute("error", *item.error); - return; - } - - // we know we are seeing an actual instruction - m_j.attribute("loadAddress", formatv("{0:x}", item.load_address)); - if (item.symbol_info) { - m_j.attribute("module", ToOptionalString(GetModuleName(item))); - m_j.attribute("symbol", - ToOptionalString( - item.symbol_info->sc.GetFunctionName().AsCString())); - m_j.attribute( - "mnemonic", - ToOptionalString(item.symbol_info->instruction->GetMnemonic( - &item.symbol_info->exe_ctx))); - - if (IsLineEntryValid(item.symbol_info->sc.line_entry)) { - m_j.attribute( - "source", - ToOptionalString( - item.symbol_info->sc.line_entry.file.GetPath().c_str())); - m_j.attribute("line", item.symbol_info->sc.line_entry.line); - m_j.attribute("column", item.symbol_info->sc.line_entry.column); - } + } else { + DumpInstruction(item); } }); } @@ -361,6 +366,8 @@ Optional<lldb::user_id_t> TraceDumper::DumpInstructions(size_t count) { if (!m_options.show_events) continue; item.event = m_cursor_up->GetEventType(); + if (*item.event == eTraceEventCPUChanged) + item.cpu_id = m_cursor_up->GetCPU(); } else if (m_cursor_up->IsError()) { item.error = m_cursor_up->GetError(); } else { diff --git a/lldb/source/Utility/Args.cpp b/lldb/source/Utility/Args.cpp index 3978f9422653..daccb91d8436 100644 --- a/lldb/source/Utility/Args.cpp +++ b/lldb/source/Utility/Args.cpp @@ -385,6 +385,7 @@ std::string Args::GetShellSafeArgument(const FileSpec &shell, }; static ShellDescriptor g_Shells[] = {{ConstString("bash"), " '\"<>()&;"}, + {ConstString("fish"), " '\"<>()&\\|;"}, {ConstString("tcsh"), " '\"<>()&;"}, {ConstString("zsh"), " '\"<>()&;\\|"}, {ConstString("sh"), " '\"<>()&;"}}; diff --git a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp index 1ad74cacc4c3..7a0ed9c53c65 100644 --- a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp +++ b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp @@ -53,7 +53,8 @@ bool fromJSON(const json::Value &value, TraceIntelPTStartRequest &packet, if (packet.IsProcessTracing()) { if (!o.map("processBufferSizeLimit", packet.process_buffer_size_limit) || - !o.map("perCpuTracing", packet.per_cpu_tracing)) + !o.map("perCpuTracing", packet.per_cpu_tracing) || + !o.map("disableCgroupTracing", packet.disable_cgroup_filtering)) return false; } return true; @@ -67,6 +68,7 @@ json::Value toJSON(const TraceIntelPTStartRequest &packet) { obj.try_emplace("psbPeriod", packet.psb_period); obj.try_emplace("enableTsc", packet.enable_tsc); obj.try_emplace("perCpuTracing", packet.per_cpu_tracing); + obj.try_emplace("disableCgroupTracing", packet.disable_cgroup_filtering); return base; } @@ -108,13 +110,15 @@ bool fromJSON(const json::Value &value, TraceIntelPTGetStateResponse &packet, json::Path path) { ObjectMapper o(value, path); return o && fromJSON(value, (TraceGetStateResponse &)packet, path) && - o.map("tscPerfZeroConversion", packet.tsc_perf_zero_conversion); + o.map("tscPerfZeroConversion", packet.tsc_perf_zero_conversion) && + o.map("usingCgroupFiltering", packet.using_cgroup_filtering); } json::Value toJSON(const TraceIntelPTGetStateResponse &packet) { json::Value base = toJSON((const TraceGetStateResponse &)packet); - base.getAsObject()->insert( - {"tscPerfZeroConversion", packet.tsc_perf_zero_conversion}); + json::Object &obj = *base.getAsObject(); + obj.insert({"tscPerfZeroConversion", packet.tsc_perf_zero_conversion}); + obj.insert({"usingCgroupFiltering", packet.using_cgroup_filtering}); return base; } diff --git a/llvm/include/llvm-c/BitReader.h b/llvm/include/llvm-c/BitReader.h index 012c0e63d3bb..088107468d4f 100644 --- a/llvm/include/llvm-c/BitReader.h +++ b/llvm/include/llvm-c/BitReader.h @@ -61,8 +61,13 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef, LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM, char **OutMessage); -/** Reads a module from the specified path, returning via the OutMP parameter a - * module provider which performs lazy deserialization. Returns 0 on success. */ +/** Reads a module from the given memory buffer, returning via the OutMP + * parameter a module provider which performs lazy deserialization. + * + * Returns 0 on success. + * + * Takes ownership of \p MemBuf if (and only if) the module was read + * successfully. */ LLVMBool LLVMGetBitcodeModuleInContext2(LLVMContextRef ContextRef, LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM); diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 2abc29851cd9..bb9e872b6ec5 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -381,8 +381,14 @@ typedef enum { the old one */ LLVMAtomicRMWBinOpFAdd, /**< Add a floating point value and return the old one */ - LLVMAtomicRMWBinOpFSub /**< Subtract a floating point value and return the - old one */ + LLVMAtomicRMWBinOpFSub, /**< Subtract a floating point value and return the + old one */ + LLVMAtomicRMWBinOpFMax, /**< Sets the value if it's greater than the + original using an floating point comparison and + return the old one */ + LLVMAtomicRMWBinOpFMin, /**< Sets the value if it's smaller than the + original using an floating point comparison and + return the old one */ } LLVMAtomicRMWBinOp; typedef enum { @@ -2161,23 +2167,12 @@ LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal); LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstExactUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); -LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); @@ -2238,9 +2233,6 @@ LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant, LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant, LLVMValueRef VectorBConstant, LLVMValueRef MaskConstant); -LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, - LLVMValueRef ElementValueConstant, - unsigned *IdxList, unsigned NumIdx); LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB); /** Deprecated: Use LLVMGetInlineAsm instead. */ @@ -3231,7 +3223,7 @@ LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst); LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst); /** - * Remove and delete an instruction. + * Remove an instruction. * * The instruction specified is removed from its containing building * block but is kept alive. @@ -3251,6 +3243,16 @@ void LLVMInstructionRemoveFromParent(LLVMValueRef Inst); void LLVMInstructionEraseFromParent(LLVMValueRef Inst); /** + * Delete an instruction. + * + * The instruction specified is deleted. It must have previously been + * removed from its containing building block. + * + * @see llvm::Value::deleteValue() + */ +void LLVMDeleteInstruction(LLVMValueRef Inst); + +/** * Obtain the code opcode for an individual instruction. * * @see llvm::Instruction::getOpCode() diff --git a/llvm/include/llvm/ADT/ScopedHashTable.h b/llvm/include/llvm/ADT/ScopedHashTable.h index 48544961d095..78d4df7d5684 100644 --- a/llvm/include/llvm/ADT/ScopedHashTable.h +++ b/llvm/include/llvm/ADT/ScopedHashTable.h @@ -147,7 +147,9 @@ public: }; template <typename K, typename V, typename KInfo, typename AllocatorTy> -class ScopedHashTable { +class ScopedHashTable : detail::AllocatorHolder<AllocatorTy> { + using AllocTy = detail::AllocatorHolder<AllocatorTy>; + public: /// ScopeTy - This is a helpful typedef that allows clients to get easy access /// to the name of the scope for this hash table. @@ -162,11 +164,9 @@ private: DenseMap<K, ValTy*, KInfo> TopLevelMap; ScopeTy *CurScope = nullptr; - AllocatorTy Allocator; - public: ScopedHashTable() = default; - ScopedHashTable(AllocatorTy A) : Allocator(A) {} + ScopedHashTable(AllocatorTy A) : AllocTy(A) {} ScopedHashTable(const ScopedHashTable &) = delete; ScopedHashTable &operator=(const ScopedHashTable &) = delete; @@ -175,8 +175,7 @@ public: } /// Access to the allocator. - AllocatorTy &getAllocator() { return Allocator; } - const AllocatorTy &getAllocator() const { return Allocator; } + using AllocTy::getAllocator; /// Return 1 if the specified key is in the table, 0 otherwise. size_type count(const K &Key) const { @@ -217,7 +216,7 @@ public: assert(S && "No scope active!"); ScopedHashTableVal<K, V> *&KeyEntry = TopLevelMap[Key]; KeyEntry = ValTy::Create(S->getLastValInScope(), KeyEntry, Key, Val, - Allocator); + getAllocator()); S->setLastValInScope(KeyEntry); } }; diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h index 23248093c67e..81f2626eea72 100644 --- a/llvm/include/llvm/ADT/StringMap.h +++ b/llvm/include/llvm/ADT/StringMap.h @@ -107,8 +107,9 @@ public: /// funky memory allocation and hashing things to make it extremely efficient, /// storing the string data *after* the value in the map. template <typename ValueTy, typename AllocatorTy = MallocAllocator> -class StringMap : public StringMapImpl { - AllocatorTy Allocator; +class StringMap : public StringMapImpl, + private detail::AllocatorHolder<AllocatorTy> { + using AllocTy = detail::AllocatorHolder<AllocatorTy>; public: using MapEntryTy = StringMapEntry<ValueTy>; @@ -119,12 +120,11 @@ public: : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))) {} explicit StringMap(AllocatorTy A) - : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))), Allocator(A) { - } + : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))), AllocTy(A) {} StringMap(unsigned InitialSize, AllocatorTy A) : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))), - Allocator(A) {} + AllocTy(A) {} StringMap(std::initializer_list<std::pair<StringRef, ValueTy>> List) : StringMapImpl(List.size(), static_cast<unsigned>(sizeof(MapEntryTy))) { @@ -132,11 +132,11 @@ public: } StringMap(StringMap &&RHS) - : StringMapImpl(std::move(RHS)), Allocator(std::move(RHS.Allocator)) {} + : StringMapImpl(std::move(RHS)), AllocTy(std::move(RHS.getAllocator())) {} StringMap(const StringMap &RHS) : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))), - Allocator(RHS.Allocator) { + AllocTy(RHS.getAllocator()) { if (RHS.empty()) return; @@ -156,7 +156,7 @@ public: } TheTable[I] = MapEntryTy::Create( - static_cast<MapEntryTy *>(Bucket)->getKey(), Allocator, + static_cast<MapEntryTy *>(Bucket)->getKey(), getAllocator(), static_cast<MapEntryTy *>(Bucket)->getValue()); HashTable[I] = RHSHashTable[I]; } @@ -171,7 +171,7 @@ public: StringMap &operator=(StringMap RHS) { StringMapImpl::swap(RHS); - std::swap(Allocator, RHS.Allocator); + std::swap(getAllocator(), RHS.getAllocator()); return *this; } @@ -183,15 +183,14 @@ public: for (unsigned I = 0, E = NumBuckets; I != E; ++I) { StringMapEntryBase *Bucket = TheTable[I]; if (Bucket && Bucket != getTombstoneVal()) { - static_cast<MapEntryTy *>(Bucket)->Destroy(Allocator); + static_cast<MapEntryTy *>(Bucket)->Destroy(getAllocator()); } } } free(TheTable); } - AllocatorTy &getAllocator() { return Allocator; } - const AllocatorTy &getAllocator() const { return Allocator; } + using AllocTy::getAllocator; using key_type = const char *; using mapped_type = ValueTy; @@ -336,7 +335,8 @@ public: if (Bucket == getTombstoneVal()) --NumTombstones; - Bucket = MapEntryTy::Create(Key, Allocator, std::forward<ArgsTy>(Args)...); + Bucket = + MapEntryTy::Create(Key, getAllocator(), std::forward<ArgsTy>(Args)...); ++NumItems; assert(NumItems + NumTombstones <= NumBuckets); @@ -354,7 +354,7 @@ public: for (unsigned I = 0, E = NumBuckets; I != E; ++I) { StringMapEntryBase *&Bucket = TheTable[I]; if (Bucket && Bucket != getTombstoneVal()) { - static_cast<MapEntryTy *>(Bucket)->Destroy(Allocator); + static_cast<MapEntryTy *>(Bucket)->Destroy(getAllocator()); } Bucket = nullptr; } @@ -370,7 +370,7 @@ public: void erase(iterator I) { MapEntryTy &V = *I; remove(&V); - V.Destroy(Allocator); + V.Destroy(getAllocator()); } bool erase(StringRef Key) { diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h index d8e524d7cb80..8addbde40c4f 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -1278,9 +1278,9 @@ bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) { } LLVM_DEBUG(dbgs() << getBlockName(HeaderNode) << " has irr loop header weight " - << HeaderWeight.getValue() << "\n"); + << HeaderWeight.value() << "\n"); NumHeadersWithWeight++; - uint64_t HeaderWeightValue = HeaderWeight.getValue(); + uint64_t HeaderWeightValue = HeaderWeight.value(); if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight) MinHeaderWeight = HeaderWeightValue; if (HeaderWeightValue) { @@ -1732,10 +1732,10 @@ raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const { if (Optional<uint64_t> ProfileCount = BlockFrequencyInfoImplBase::getBlockProfileCount( F->getFunction(), getNode(&BB))) - OS << ", count = " << ProfileCount.getValue(); + OS << ", count = " << ProfileCount.value(); if (Optional<uint64_t> IrrLoopHeaderWeight = BB.getIrrLoopHeaderWeight()) - OS << ", irr_loop_header_weight = " << IrrLoopHeaderWeight.getValue(); + OS << ", irr_loop_header_weight = " << IrrLoopHeaderWeight.value(); OS << "\n"; } diff --git a/llvm/include/llvm/Analysis/GlobalsModRef.h b/llvm/include/llvm/Analysis/GlobalsModRef.h index 4d8ed10bb18e..62095a1d6ad2 100644 --- a/llvm/include/llvm/Analysis/GlobalsModRef.h +++ b/llvm/include/llvm/Analysis/GlobalsModRef.h @@ -102,16 +102,12 @@ public: ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, AAQueryInfo &AAQI); + using AAResultBase::getModRefBehavior; /// getModRefBehavior - Return the behavior of the specified function if /// called from the specified call site. The call site may be null in which /// case the most generic behavior of this function should be returned. FunctionModRefBehavior getModRefBehavior(const Function *F); - /// getModRefBehavior - Return the behavior of the specified function if - /// called from the specified call site. The call site may be null in which - /// case the most generic behavior of this function should be returned. - FunctionModRefBehavior getModRefBehavior(const CallBase *Call); - private: FunctionInfo *getFunctionInfo(const Function *F); diff --git a/llvm/include/llvm/Analysis/InstSimplifyFolder.h b/llvm/include/llvm/Analysis/InstSimplifyFolder.h index d4ea7d73ec92..16bd9f765421 100644 --- a/llvm/include/llvm/Analysis/InstSimplifyFolder.h +++ b/llvm/include/llvm/Analysis/InstSimplifyFolder.h @@ -67,6 +67,11 @@ public: return simplifyBinOp(Opc, LHS, RHS, FMF, SQ); } + Value *FoldUnOpFMF(Instruction::UnaryOps Opc, Value *V, + FastMathFlags FMF) const override { + return simplifyUnOp(Opc, V, FMF, SQ); + } + Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override { return simplifyICmpInst(P, LHS, RHS, SQ); } @@ -108,18 +113,6 @@ public: } //===--------------------------------------------------------------------===// - // Unary Operators - //===--------------------------------------------------------------------===// - - Value *CreateFNeg(Constant *C) const override { - return ConstFolder.CreateFNeg(C); - } - - Value *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const override { - return ConstFolder.CreateUnOp(Opc, C); - } - - //===--------------------------------------------------------------------===// // Cast/Conversion Operators //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index de1cc299f062..5b49ab14286b 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -535,8 +535,10 @@ public: /// Parse NSW/NUW flags from add/sub/mul IR binary operation \p Op into /// SCEV no-wrap flags, and deduce flag[s] that aren't known yet. - /// Does not mutate the original instruction. - std::pair<SCEV::NoWrapFlags, bool /*Deduced*/> + /// Does not mutate the original instruction. Returns None if it could not + /// deduce more precise flags than the instruction already has, otherwise + /// returns proven flags. + Optional<SCEV::NoWrapFlags> getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO); /// Notify this ScalarEvolution that \p User directly uses SCEVs in \p Ops. diff --git a/llvm/include/llvm/Analysis/TargetFolder.h b/llvm/include/llvm/Analysis/TargetFolder.h index 3a7218b10b97..c42577330e9b 100644 --- a/llvm/include/llvm/Analysis/TargetFolder.h +++ b/llvm/include/llvm/Analysis/TargetFolder.h @@ -55,8 +55,11 @@ public: Value *RHS) const override { auto *LC = dyn_cast<Constant>(LHS); auto *RC = dyn_cast<Constant>(RHS); - if (LC && RC) - return Fold(ConstantExpr::get(Opc, LC, RC)); + if (LC && RC) { + if (ConstantExpr::isDesirableBinOp(Opc)) + return Fold(ConstantExpr::get(Opc, LC, RC)); + return ConstantFoldBinaryOpOperands(Opc, LC, RC, DL); + } return nullptr; } @@ -64,9 +67,12 @@ public: bool IsExact) const override { auto *LC = dyn_cast<Constant>(LHS); auto *RC = dyn_cast<Constant>(RHS); - if (LC && RC) - return Fold(ConstantExpr::get( - Opc, LC, RC, IsExact ? PossiblyExactOperator::IsExact : 0)); + if (LC && RC) { + if (ConstantExpr::isDesirableBinOp(Opc)) + return Fold(ConstantExpr::get( + Opc, LC, RC, IsExact ? PossiblyExactOperator::IsExact : 0)); + return ConstantFoldBinaryOpOperands(Opc, LC, RC, DL); + } return nullptr; } @@ -75,12 +81,15 @@ public: auto *LC = dyn_cast<Constant>(LHS); auto *RC = dyn_cast<Constant>(RHS); if (LC && RC) { - unsigned Flags = 0; - if (HasNUW) - Flags |= OverflowingBinaryOperator::NoUnsignedWrap; - if (HasNSW) - Flags |= OverflowingBinaryOperator::NoSignedWrap; - return Fold(ConstantExpr::get(Opc, LC, RC, Flags)); + if (ConstantExpr::isDesirableBinOp(Opc)) { + unsigned Flags = 0; + if (HasNUW) + Flags |= OverflowingBinaryOperator::NoUnsignedWrap; + if (HasNSW) + Flags |= OverflowingBinaryOperator::NoSignedWrap; + return Fold(ConstantExpr::get(Opc, LC, RC, Flags)); + } + return ConstantFoldBinaryOpOperands(Opc, LC, RC, DL); } return nullptr; } @@ -89,11 +98,19 @@ public: FastMathFlags FMF) const override { return FoldBinOp(Opc, LHS, RHS); } + Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override { auto *LC = dyn_cast<Constant>(LHS); auto *RC = dyn_cast<Constant>(RHS); if (LC && RC) - return ConstantExpr::getCompare(P, LC, RC); + return Fold(ConstantExpr::getCompare(P, LC, RC)); + return nullptr; + } + + Value *FoldUnOpFMF(Instruction::UnaryOps Opc, Value *V, + FastMathFlags FMF) const override { + if (Constant *C = dyn_cast<Constant>(V)) + return Fold(ConstantExpr::get(Opc, C)); return nullptr; } @@ -165,18 +182,6 @@ public: } //===--------------------------------------------------------------------===// - // Unary Operators - //===--------------------------------------------------------------------===// - - Constant *CreateFNeg(Constant *C) const override { - return Fold(ConstantExpr::getFNeg(C)); - } - - Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const override { - return Fold(ConstantExpr::get(Opc, C)); - } - - //===--------------------------------------------------------------------===// // Cast/Conversion Operators //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 372f17cfc7ff..c64cb51cc08e 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -160,6 +160,8 @@ public: bool skipScalarizationCost() const { return ScalarizationCost.isValid(); } }; +enum class PredicationStyle { None, Data, DataAndControlFlow }; + class TargetTransformInfo; typedef TargetTransformInfo TTI; @@ -531,8 +533,12 @@ public: const LoopAccessInfo *LAI) const; /// Query the target whether lowering of the llvm.get.active.lane.mask - /// intrinsic is supported. - bool emitGetActiveLaneMask() const; + /// intrinsic is supported and how the mask should be used. A return value + /// of PredicationStyle::Data indicates the mask is used as data only, + /// whereas PredicationStyle::DataAndControlFlow indicates we should also use + /// the mask for control flow in the loop. If unsupported the return value is + /// PredicationStyle::None. + PredicationStyle emitGetActiveLaneMask() const; // Parameters that control the loop peeling transformation struct PeelingPreferences { @@ -1553,7 +1559,7 @@ public: preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) = 0; - virtual bool emitGetActiveLaneMask() = 0; + virtual PredicationStyle emitGetActiveLaneMask() = 0; virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) = 0; virtual Optional<Value *> @@ -1932,7 +1938,7 @@ public: const LoopAccessInfo *LAI) override { return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); } - bool emitGetActiveLaneMask() override { + PredicationStyle emitGetActiveLaneMask() override { return Impl.emitGetActiveLaneMask(); } Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index a70c418974f5..af71fc9bffaf 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -167,8 +167,8 @@ public: return false; } - bool emitGetActiveLaneMask() const { - return false; + PredicationStyle emitGetActiveLaneMask() const { + return PredicationStyle::None; } Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 3b29bf1d53b4..7cb1a91d8c93 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -458,7 +458,7 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// /// This method can return true for instructions that read memory; /// for such instructions, moving them may change the resulting value. - bool isSafeToSpeculativelyExecute(const Value *V, + bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); @@ -481,8 +481,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// This behavior is a shortcoming in the current implementation and not /// intentional. bool isSafeToSpeculativelyExecuteWithOpcode( - unsigned Opcode, const Operator *Inst, const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr, + unsigned Opcode, const Instruction *Inst, + const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); /// Returns true if the result or effects of the given instructions \p I diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 0005874ba040..fa0892788b43 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -236,10 +236,10 @@ class VFDatabase { // ensuring that the variant described in the attribute has a // corresponding definition or declaration of the vector // function in the Module M. - if (Shape && (Shape.getValue().ScalarName == ScalarName)) { - assert(CI.getModule()->getFunction(Shape.getValue().VectorName) && + if (Shape && (Shape.value().ScalarName == ScalarName)) { + assert(CI.getModule()->getFunction(Shape.value().VectorName) && "Vector function is missing."); - Mappings.push_back(Shape.getValue()); + Mappings.push_back(Shape.value()); } } } diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 230a1662cc04..04235f0fdc4e 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -221,6 +221,8 @@ enum Kind { kw_min, kw_umax, kw_umin, + kw_fmax, + kw_fmin, // Instruction Opcodes (Opcode in UIntVal). kw_fneg, @@ -397,9 +399,6 @@ enum Kind { // GV's with __attribute__((no_sanitize("hwaddress"))), or things in // -fsanitize-ignorelist when built with HWASan. kw_no_sanitize_hwaddress, - // GV's with __attribute__((no_sanitize("memtag"))), or things in - // -fsanitize-ignorelist when built with memory tagging. - kw_no_sanitize_memtag, // GV's where the clang++ frontend (when ASan is used) notes that this is // dynamically initialized, and thus needs ODR detection. kw_sanitize_address_dyninit, diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 1e0ef613788d..1fd025761127 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -990,6 +990,7 @@ enum : unsigned { // backward-compatibility). SHT_LLVM_CALL_GRAPH_PROFILE = 0x6fff4c09, // LLVM Call Graph Profile. SHT_LLVM_BB_ADDR_MAP = 0x6fff4c0a, // LLVM Basic Block Address Map. + SHT_LLVM_OFFLOADING = 0x6fff4c0b, // LLVM device offloading data. // Android's experimental support for SHT_RELR sections. // https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512 SHT_ANDROID_RELR = 0x6fffff00, // Relocation entries; only offsets. diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 5d96204ba42a..eee4c50cc13b 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -458,7 +458,9 @@ enum RMWOperations { RMW_UMAX = 9, RMW_UMIN = 10, RMW_FADD = 11, - RMW_FSUB = 12 + RMW_FSUB = 12, + RMW_FMAX = 13, + RMW_FMIN = 14 }; /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing @@ -686,6 +688,7 @@ enum AttributeKindCodes { ATTR_KIND_ALLOCATED_POINTER = 81, ATTR_KIND_ALLOC_KIND = 82, ATTR_KIND_PRESPLIT_COROUTINE = 83, + ATTR_KIND_FNRETTHUNK_EXTERN = 84, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index be6bab5532bd..143f9ba17afe 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -386,12 +386,12 @@ private: const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++); if (Op.isLiteral()) - EmitAbbreviatedLiteral(Op, Code.getValue()); + EmitAbbreviatedLiteral(Op, Code.value()); else { assert(Op.getEncoding() != BitCodeAbbrevOp::Array && Op.getEncoding() != BitCodeAbbrevOp::Blob && "Expected literal or scalar"); - EmitAbbreviatedField(Op, Code.getValue()); + EmitAbbreviatedField(Op, Code.value()); } } diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index fb4627c029b0..5e900e9162d8 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -41,6 +41,7 @@ class DIEAbbrev; class DwarfDebug; class GCMetadataPrinter; class GCStrategy; +class GlobalAlias; class GlobalObject; class GlobalValue; class GlobalVariable; @@ -474,7 +475,11 @@ public: virtual const MCExpr *lowerConstant(const Constant *CV); /// Print a general LLVM constant to the .s file. - void emitGlobalConstant(const DataLayout &DL, const Constant *CV); + /// On AIX, when an alias refers to a sub-element of a global variable, the + /// label of that alias needs to be emitted before the corresponding element. + using AliasMapTy = DenseMap<uint64_t, SmallVector<const GlobalAlias *, 1>>; + void emitGlobalConstant(const DataLayout &DL, const Constant *CV, + AliasMapTy *AliasList = nullptr); /// Unnamed constant global variables solely contaning a pointer to /// another globals variable act like a global variable "proxy", or GOT diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 46be8e030406..b5b766ff03f1 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -607,7 +607,7 @@ public: return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); } - bool emitGetActiveLaneMask() { + PredicationStyle emitGetActiveLaneMask() { return BaseT::emitGetActiveLaneMask(); } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 8ea45e576e4d..44ba81223ec3 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -196,6 +196,10 @@ enum { /// - PredicateID - The ID of the predicate function to call GIM_CheckCxxInsnPredicate, + /// Check if there's no use of the first result. + /// - InsnID - Instruction ID + GIM_CheckHasNoUse, + /// Check the type for the specified operand /// - InsnID - Instruction ID /// - OpIdx - Operand index diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index c06b33d11170..1229dfcb2c31 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -379,6 +379,25 @@ bool InstructionSelector::executeMatchTable( return false; break; } + case GIM_CheckHasNoUse: { + int64_t InsnID = MatchTable[CurrentIdx++]; + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckHasNoUse(MIs[" + << InsnID << "]\n"); + + const MachineInstr *MI = State.MIs[InsnID]; + assert(MI && "Used insn before defined"); + assert(MI->getNumDefs() > 0 && "No defs"); + const Register Res = MI->getOperand(0).getReg(); + + if (!MRI.use_nodbg_empty(Res)) { + if (handleReject() == RejectAndGiveUp) + return false; + } + + break; + } case GIM_CheckAtomicOrdering: { int64_t InsnID = MatchTable[CurrentIdx++]; AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++]; @@ -675,7 +694,7 @@ bool InstructionSelector::executeMatchTable( (ISel.*ISelInfo.ComplexPredicates[ComplexPredicateID])( State.MIs[InsnID]->getOperand(OpIdx)); if (Renderer) - State.Renderers[RendererID] = Renderer.getValue(); + State.Renderers[RendererID] = Renderer.value(); else if (handleReject() == RejectAndGiveUp) return false; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index c6c57ac07f0e..caa6346a40db 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -327,9 +327,6 @@ public: LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy); - LegalizeResult fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy); - LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 16ba568c1be9..01fd5d94d371 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -683,6 +683,13 @@ public: MachineInstrBuilder buildBoolExt(const DstOp &Res, const SrcOp &Op, bool IsFP); + // Build and insert \p Res = G_SEXT_INREG \p Op, 1 or \p Res = G_AND \p Op, 1, + // or COPY depending on how the target wants to extend boolean values, using + // the original register size. + MachineInstrBuilder buildBoolExtInReg(const DstOp &Res, const SrcOp &Op, + bool IsVector, + bool IsFP); + /// Build and insert \p Res = G_ZEXT \p Op /// /// G_ZEXT produces a register of the specified width, with bits 0 to @@ -1401,6 +1408,40 @@ public: const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, MachineMemOperand &MMO); + /// Build and insert `OldValRes<def> = G_ATOMICRMW_FMAX Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the floating point maximum of + /// \p Val and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWFMax( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_FMIN Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the floating point minimum of + /// \p Val and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWFMin( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO); + /// Build and insert `G_FENCE Ordering, Scope`. MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope); diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 120f89952a95..14bbcd24d04d 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1195,6 +1195,8 @@ enum NodeType { ATOMIC_LOAD_UMAX, ATOMIC_LOAD_FADD, ATOMIC_LOAD_FSUB, + ATOMIC_LOAD_FMAX, + ATOMIC_LOAD_FMIN, // Masked load and store - consecutive vector load and store operations // with additional mask operand that prevents memory accesses to the @@ -1285,6 +1287,11 @@ enum NodeType { VECREDUCE_UMAX, VECREDUCE_UMIN, + // The `llvm.experimental.stackmap` intrinsic. + // Operands: input chain, glue, <id>, <numShadowBytes>, [live0[, live1...]] + // Outputs: output chain, glue + STACKMAP, + // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID, #include "llvm/IR/VPIntrinsics.def" diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index 0554eb1ab77e..8000c9db428d 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -103,8 +103,10 @@ extern cl::opt<bool> ForceBottomUp; extern cl::opt<bool> VerifyScheduling; #ifndef NDEBUG extern cl::opt<bool> ViewMISchedDAGs; +extern cl::opt<bool> PrintDAGs; #else extern const bool ViewMISchedDAGs; +extern const bool PrintDAGs; #endif class AAResults; diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 35fb0bc80593..87df6d1b1604 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -321,6 +321,7 @@ private: void Select_FREEZE(SDNode *N); void Select_ARITH_FENCE(SDNode *N); + void Select_STACKMAP(SDNode *N); private: void DoInstructionSelection(); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 5974f13a296b..c531ddf8e906 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1403,6 +1403,8 @@ public: case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_LOAD_FADD: case ISD::ATOMIC_LOAD_FSUB: + case ISD::ATOMIC_LOAD_FMAX: + case ISD::ATOMIC_LOAD_FMIN: case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: case ISD::MLOAD: @@ -1468,6 +1470,8 @@ public: N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || N->getOpcode() == ISD::ATOMIC_LOAD_FADD || N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || + N->getOpcode() == ISD::ATOMIC_LOAD_FMAX || + N->getOpcode() == ISD::ATOMIC_LOAD_FMIN || N->getOpcode() == ISD::ATOMIC_LOAD || N->getOpcode() == ISD::ATOMIC_STORE; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 98b9a416ea59..ab5d3ba0164d 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -546,6 +546,9 @@ public: return BypassSlowDivWidths; } + /// Return true only if vscale must be a power of two. + virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; } + /// Return true if Flow Control is an expensive operation that should be /// avoided. bool isJumpExpensive() const { return JumpIsExpensive; } diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h index 0b2e033bd97a..b2b2e2e873be 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -692,9 +692,6 @@ private: bool getDIENames(const DWARFDie &Die, AttributesInfo &Info, OffsetsStringPool &StringPool, bool StripTemplate = false); - /// Create a copy of abbreviation Abbrev. - void copyAbbrev(const DWARFAbbreviationDeclaration &Abbrev, bool hasODR); - uint32_t hashFullyQualifiedName(DWARFDie DIE, CompileUnit &U, const DWARFFile &File, int RecurseDepth = 0); diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h index 00c4bf0a615f..f233a183912b 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -243,6 +243,8 @@ private: std::function<void()> Evictor; }; +Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj); + } // end namespace symbolize } // end namespace llvm diff --git a/llvm/include/llvm/Debuginfod/Debuginfod.h b/llvm/include/llvm/Debuginfod/Debuginfod.h index 064cfa75b1a1..496b24cfa37e 100644 --- a/llvm/include/llvm/Debuginfod/Debuginfod.h +++ b/llvm/include/llvm/Debuginfod/Debuginfod.h @@ -7,23 +7,32 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file contains the declarations of getCachedOrDownloadArtifact and -/// several convenience functions for specific artifact types: -/// getCachedOrDownloadSource, getCachedOrDownloadExecutable, and -/// getCachedOrDownloadDebuginfo. This file also declares -/// getDefaultDebuginfodUrls and getDefaultDebuginfodCacheDirectory. -/// +/// This file contains several declarations for the debuginfod client and +/// server. The client functions are getDefaultDebuginfodUrls, +/// getCachedOrDownloadArtifact, and several convenience functions for specific +/// artifact types: getCachedOrDownloadSource, getCachedOrDownloadExecutable, +/// and getCachedOrDownloadDebuginfo. For the server, this file declares the +/// DebuginfodLogEntry and DebuginfodServer structs, as well as the +/// DebuginfodLog, DebuginfodCollection classes. /// //===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFOD_DEBUGINFOD_H #define LLVM_DEBUGINFOD_DEBUGINFOD_H +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Debuginfod/HTTPServer.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/RWMutex.h" +#include "llvm/Support/Timer.h" #include <chrono> +#include <condition_variable> +#include <queue> namespace llvm { @@ -68,6 +77,68 @@ Expected<std::string> getCachedOrDownloadArtifact( StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath, ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout); +class ThreadPool; + +struct DebuginfodLogEntry { + std::string Message; + DebuginfodLogEntry() = default; + DebuginfodLogEntry(const Twine &Message); +}; + +class DebuginfodLog { + std::mutex QueueMutex; + std::condition_variable QueueCondition; + std::queue<DebuginfodLogEntry> LogEntryQueue; + +public: + // Adds a log entry to end of the queue. + void push(DebuginfodLogEntry Entry); + // Adds a log entry to end of the queue. + void push(const Twine &Message); + // Blocks until there are log entries in the queue, then pops and returns the + // first one. + DebuginfodLogEntry pop(); +}; + +/// Tracks a collection of debuginfod artifacts on the local filesystem. +class DebuginfodCollection { + SmallVector<std::string, 1> Paths; + sys::RWMutex BinariesMutex; + StringMap<std::string> Binaries; + sys::RWMutex DebugBinariesMutex; + StringMap<std::string> DebugBinaries; + Error findBinaries(StringRef Path); + Expected<Optional<std::string>> getDebugBinaryPath(BuildIDRef); + Expected<Optional<std::string>> getBinaryPath(BuildIDRef); + // If the collection has not been updated since MinInterval, call update() and + // return true. Otherwise return false. If update returns an error, return the + // error. + Expected<bool> updateIfStale(); + DebuginfodLog &Log; + ThreadPool &Pool; + Timer UpdateTimer; + sys::Mutex UpdateMutex; + + // Minimum update interval, in seconds, for on-demand updates triggered when a + // build-id is not found. + double MinInterval; + +public: + DebuginfodCollection(ArrayRef<StringRef> Paths, DebuginfodLog &Log, + ThreadPool &Pool, double MinInterval); + Error update(); + Error updateForever(std::chrono::milliseconds Interval); + Expected<std::string> findDebugBinaryPath(BuildIDRef); + Expected<std::string> findBinaryPath(BuildIDRef); +}; + +struct DebuginfodServer { + HTTPServer Server; + DebuginfodLog &Log; + DebuginfodCollection &Collection; + DebuginfodServer(DebuginfodLog &Log, DebuginfodCollection &Collection); +}; + } // end namespace llvm #endif diff --git a/llvm/include/llvm/Debuginfod/HTTPServer.h b/llvm/include/llvm/Debuginfod/HTTPServer.h new file mode 100644 index 000000000000..410ba32b3f2e --- /dev/null +++ b/llvm/include/llvm/Debuginfod/HTTPServer.h @@ -0,0 +1,123 @@ +//===-- llvm/Debuginfod/HTTPServer.h - HTTP server library ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declarations of the HTTPServer and HTTPServerRequest +/// classes, the HTTPResponse, and StreamingHTTPResponse structs, and the +/// streamFile function. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_HTTP_SERVER_H +#define LLVM_SUPPORT_HTTP_SERVER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +#ifdef LLVM_ENABLE_HTTPLIB +// forward declarations +namespace httplib { +class Request; +class Response; +class Server; +} // namespace httplib +#endif + +namespace llvm { + +struct HTTPResponse; +struct StreamingHTTPResponse; +class HTTPServer; + +class HTTPServerRequest { + friend HTTPServer; + +#ifdef LLVM_ENABLE_HTTPLIB +private: + HTTPServerRequest(const httplib::Request &HTTPLibRequest, + httplib::Response &HTTPLibResponse); + httplib::Response &HTTPLibResponse; +#endif + +public: + std::string UrlPath; + /// The elements correspond to match groups in the url path matching regex. + SmallVector<std::string, 1> UrlPathMatches; + + // TODO bring in HTTP headers + + void setResponse(StreamingHTTPResponse Response); + void setResponse(HTTPResponse Response); +}; + +struct HTTPResponse { + unsigned Code; + const char *ContentType; + StringRef Body; +}; + +typedef std::function<void(HTTPServerRequest &)> HTTPRequestHandler; + +/// An HTTPContentProvider is called by the HTTPServer to obtain chunks of the +/// streaming response body. The returned chunk should be located at Offset +/// bytes and have Length bytes. +typedef std::function<StringRef(size_t /*Offset*/, size_t /*Length*/)> + HTTPContentProvider; + +/// Wraps the content provider with HTTP Status code and headers. +struct StreamingHTTPResponse { + unsigned Code; + const char *ContentType; + size_t ContentLength; + HTTPContentProvider Provider; + /// Called after the response transfer is complete with the success value of + /// the transfer. + std::function<void(bool)> CompletionHandler = [](bool Success) {}; +}; + +/// Sets the response to stream the file at FilePath, if available, and +/// otherwise an HTTP 404 error response. +bool streamFile(HTTPServerRequest &Request, StringRef FilePath); + +/// An HTTP server which can listen on a single TCP/IP port for HTTP +/// requests and delgate them to the appropriate registered handler. +class HTTPServer { +#ifdef LLVM_ENABLE_HTTPLIB + std::unique_ptr<httplib::Server> Server; + unsigned Port = 0; +#endif +public: + HTTPServer(); + ~HTTPServer(); + + /// Returns true only if LLVM has been compiled with a working HTTPServer. + static bool isAvailable(); + + /// Registers a URL pattern routing rule. When the server is listening, each + /// request is dispatched to the first registered handler whose UrlPathPattern + /// matches the UrlPath. + Error get(StringRef UrlPathPattern, HTTPRequestHandler Handler); + + /// Attempts to assign the requested port and interface, returning an Error + /// upon failure. + Error bind(unsigned Port, const char *HostInterface = "0.0.0.0"); + + /// Attempts to assign any available port and interface, returning either the + /// port number or an Error upon failure. + Expected<unsigned> bind(const char *HostInterface = "0.0.0.0"); + + /// Attempts to listen for requests on the bound port. Returns an Error if + /// called before binding a port. + Error listen(); + + /// If the server is listening, stop and unbind the socket. + void stop(); +}; +} // end namespace llvm + +#endif // LLVM_SUPPORT_HTTP_SERVER_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/COFF.h b/llvm/include/llvm/ExecutionEngine/JITLink/COFF.h new file mode 100644 index 000000000000..87d3648d37e8 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/COFF.h @@ -0,0 +1,39 @@ +//===------- COFF.h - Generic JIT link function for COFF ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic jit-link functions for COFF. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_COFF_H +#define LLVM_EXECUTIONENGINE_JITLINK_COFF_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { + +/// Create a LinkGraph from an COFF relocatable object. +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromCOFFObject(MemoryBufferRef ObjectBuffer); + +/// Link the given graph. +/// +/// Uses conservative defaults for GOT and stub handling based on the target +/// platform. +void link_COFF(std::unique_ptr<LinkGraph> G, + std::unique_ptr<JITLinkContext> Ctx); + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_COFF_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/COFF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/COFF_x86_64.h new file mode 100644 index 000000000000..fff32d6d9609 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/COFF_x86_64.h @@ -0,0 +1,38 @@ +//===--- COFF_x86_64.h - JIT link functions for COFF/x86-64 ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// jit-link functions for COFF/x86-64. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_COFF_X86_64_H +#define LLVM_EXECUTIONENGINE_JITLINK_COFF_X86_64_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { + +/// Create a LinkGraph from an COFF/x86-64 relocatable object. +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromCOFFObject_x86_64(MemoryBufferRef ObjectBuffer); + +/// jit-link the given object buffer, which must be a COFF x86-64 object file. +void link_COFF_x86_64(std::unique_ptr<LinkGraph> G, + std::unique_ptr<JITLinkContext> Ctx); + +/// Return the string name of the given COFF x86-64 edge kind. +const char *getCOFFX86RelocationKindName(Edge::Kind R); +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_COFF_X86_64_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h index 53ff6c7a219e..f2c3fba7bcde 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h @@ -33,7 +33,9 @@ enum EdgeKind_aarch64 : Edge::Kind { GOTPageOffset12, TLVPage21, TLVPageOffset12, - PointerToGOT, + TLSDescPage21, + TLSDescPageOffset12, + Delta32ToGOT, PairedAddend, LDRLiteral19, Delta32, @@ -223,10 +225,12 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) { break; } case TLVPage21: - case GOTPage21: case TLVPageOffset12: + case TLSDescPage21: + case TLSDescPageOffset12: + case GOTPage21: case GOTPageOffset12: - case PointerToGOT: { + case Delta32ToGOT: { return make_error<JITLinkError>( "In graph " + G.getName() + ", section " + B.getSection().getName() + "GOT/TLV edge kinds not lowered: " + getEdgeKindName(E.getKind())); @@ -273,8 +277,8 @@ public: "RawInstr isn't a 64-bit LDR immediate"); break; } - case aarch64::PointerToGOT: { - KindToSet = aarch64::Delta64; + case aarch64::Delta32ToGOT: { + KindToSet = aarch64::Delta32; break; } default: diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h index 141dd73548c8..57ffe250a19d 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h @@ -138,9 +138,6 @@ private: using InitSymbolDepMap = DenseMap<MaterializationResponsibility *, JITLinkSymbolSet>; - void addEHAndTLVSupportPasses(MaterializationResponsibility &MR, - jitlink::PassConfiguration &Config); - Error associateJITDylibHeaderSymbol(jitlink::LinkGraph &G, MaterializationResponsibility &MR); diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td index c68330665b02..45d815894454 100644 --- a/llvm/include/llvm/Frontend/OpenACC/ACC.td +++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td @@ -128,10 +128,8 @@ def ACCC_DeviceResident : Clause<"device_resident"> { // 2.4 def ACCC_DeviceType : Clause<"device_type"> { - let flangClass = "ScalarIntExpr"; + let flangClass = "AccDeviceTypeExprList"; let defaultValue = "*"; - let isValueOptional = true; - let isValueList = true; } // 2.6.6 @@ -218,6 +216,7 @@ def ACCC_Reduction : Clause<"reduction"> { // 2.5.6 def ACCC_Self : Clause<"self"> { let flangClass = "AccSelfClause"; + let isValueOptional = true; } // 2.9.5 diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 8a6b1c7d412d..3dfcabffb58a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -599,9 +599,9 @@ public: /// Add metadata to simd-ize a loop. /// - /// \param DL Debug location for instructions added by unrolling. - /// \param Loop The loop to simd-ize. - void applySimd(DebugLoc DL, CanonicalLoopInfo *Loop); + /// \param Loop The loop to simd-ize. + /// \param Simdlen The Simdlen length to apply to the simd loop. + void applySimd(CanonicalLoopInfo *Loop, ConstantInt *Simdlen); /// Generator for '#omp flush' /// @@ -821,6 +821,23 @@ public: omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB = {}); + /// Generate a target region entry call. + /// + /// \param Loc The location at which the request originated and is fulfilled. + /// \param Return Return value of the created function returned by reference. + /// \param DeviceID Identifier for the device via the 'device' clause. + /// \param NumTeams Numer of teams for the region via the 'num_teams' clause + /// or 0 if unspecified and -1 if there is no 'teams' clause. + /// \param NumThreads Number of threads via the 'thread_limit' clause. + /// \param HostPtr Pointer to the host-side pointer of the target kernel. + /// \param KernelArgs Array of arguments to the kernel. + /// \param NoWaitKernelArgs Optional array of arguments to the nowait kernel. + InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return, + Value *Ident, Value *DeviceID, Value *NumTeams, + Value *NumThreads, Value *HostPtr, + ArrayRef<Value *> KernelArgs, + ArrayRef<Value *> NoWaitArgs = {}); + /// Generate a barrier runtime call. /// /// \param Loc The location at which the request originated and is fulfilled. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 14aa53a6b08d..9d1ab57729b7 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -88,6 +88,8 @@ __OMP_ARRAY_TYPE(KmpCriticalName, Int32, 8) __OMP_STRUCT_TYPE(Ident, ident_t, Int32, Int32, Int32, Int32, Int8Ptr) __OMP_STRUCT_TYPE(OffloadEntry, __tgt_offload_entry, Int8Ptr, Int8Ptr, SizeTy, Int32, Int32) +__OMP_STRUCT_TYPE(KernelArgs, __tgt_kernel_arguments, Int32, Int32, VoidPtrPtr, + VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, Int64) __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, Int8Ptr) #undef __OMP_STRUCT_TYPE @@ -412,6 +414,10 @@ __OMP_RTL(__tgt_target_teams_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int __OMP_RTL(__tgt_target_teams_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, Int32, Int32, Int32, VoidPtr, Int32, VoidPtr) +__OMP_RTL(__tgt_target_kernel, false, Int32, IdentPtr, Int64, Int32, Int32, + VoidPtr, KernelArgsPtr) +__OMP_RTL(__tgt_target_kernel_nowait, false, Int32, IdentPtr, Int64, Int32, + Int32, VoidPtr, KernelArgsPtr, Int32, VoidPtr, Int32, VoidPtr) __OMP_RTL(__tgt_register_requires, false, Void, Int64) __OMP_RTL(__tgt_target_data_begin_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) @@ -937,6 +943,10 @@ __OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper, ForkAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(__tgt_target_kernel, ForkAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(__tgt_target_kernel_nowait, ForkAttrs, AttributeSet(), + ParamAttrs()) __OMP_RTL_ATTRS(__tgt_register_requires, ForkAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(), diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 7b955b40b0a8..ea4bf80205f8 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -102,6 +102,10 @@ def DisableSanitizerInstrumentation: EnumAttr<"disable_sanitizer_instrumentation /// Provide pointer element type to intrinsic. def ElementType : TypeAttr<"elementtype", [ParamAttr]>; +/// Whether to keep return instructions, or replace with a jump to an external +/// symbol. +def FnRetThunkExtern : EnumAttr<"fn_ret_thunk_extern", [FnAttr]>; + /// Function may only access memory that is inaccessible from IR. def InaccessibleMemOnly : EnumAttr<"inaccessiblememonly", [FnAttr]>; diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h index a97372ebbad2..09fb2c98bff4 100644 --- a/llvm/include/llvm/IR/Constant.h +++ b/llvm/include/llvm/IR/Constant.h @@ -115,10 +115,6 @@ public: /// any constant expressions. bool containsConstantExpression() const; - /// Return true if evaluation of this constant could trap. This is true for - /// things like constant expressions that could divide by zero. - bool canTrap() const; - /// Return true if the value can vary between threads. bool isThreadDependent() const; diff --git a/llvm/include/llvm/IR/ConstantFolder.h b/llvm/include/llvm/IR/ConstantFolder.h index 5e7ddb9aa673..bd28ff87965d 100644 --- a/llvm/include/llvm/IR/ConstantFolder.h +++ b/llvm/include/llvm/IR/ConstantFolder.h @@ -44,8 +44,11 @@ public: Value *RHS) const override { auto *LC = dyn_cast<Constant>(LHS); auto *RC = dyn_cast<Constant>(RHS); - if (LC && RC) - return ConstantExpr::get(Opc, LC, RC); + if (LC && RC) { + if (ConstantExpr::isDesirableBinOp(Opc)) + return ConstantExpr::get(Opc, LC, RC); + return ConstantFoldBinaryInstruction(Opc, LC, RC); + } return nullptr; } @@ -53,9 +56,12 @@ public: bool IsExact) const override { auto *LC = dyn_cast<Constant>(LHS); auto *RC = dyn_cast<Constant>(RHS); - if (LC && RC) - return ConstantExpr::get(Opc, LC, RC, - IsExact ? PossiblyExactOperator::IsExact : 0); + if (LC && RC) { + if (ConstantExpr::isDesirableBinOp(Opc)) + return ConstantExpr::get(Opc, LC, RC, + IsExact ? PossiblyExactOperator::IsExact : 0); + return ConstantFoldBinaryInstruction(Opc, LC, RC); + } return nullptr; } @@ -64,12 +70,15 @@ public: auto *LC = dyn_cast<Constant>(LHS); auto *RC = dyn_cast<Constant>(RHS); if (LC && RC) { - unsigned Flags = 0; - if (HasNUW) - Flags |= OverflowingBinaryOperator::NoUnsignedWrap; - if (HasNSW) - Flags |= OverflowingBinaryOperator::NoSignedWrap; - return ConstantExpr::get(Opc, LC, RC, Flags); + if (ConstantExpr::isDesirableBinOp(Opc)) { + unsigned Flags = 0; + if (HasNUW) + Flags |= OverflowingBinaryOperator::NoUnsignedWrap; + if (HasNSW) + Flags |= OverflowingBinaryOperator::NoSignedWrap; + return ConstantExpr::get(Opc, LC, RC, Flags); + } + return ConstantFoldBinaryInstruction(Opc, LC, RC); } return nullptr; } @@ -79,6 +88,13 @@ public: return FoldBinOp(Opc, LHS, RHS); } + Value *FoldUnOpFMF(Instruction::UnaryOps Opc, Value *V, + FastMathFlags FMF) const override { + if (Constant *C = dyn_cast<Constant>(V)) + return ConstantExpr::get(Opc, C); + return nullptr; + } + Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override { auto *LC = dyn_cast<Constant>(LHS); auto *RC = dyn_cast<Constant>(RHS); @@ -155,18 +171,6 @@ public: } //===--------------------------------------------------------------------===// - // Unary Operators - //===--------------------------------------------------------------------===// - - Constant *CreateFNeg(Constant *C) const override { - return ConstantExpr::getFNeg(C); - } - - Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const override { - return ConstantExpr::get(Opc, C); - } - - //===--------------------------------------------------------------------===// // Cast/Conversion Operators //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index b5445ff71b74..c50dff43dc74 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -1017,19 +1017,10 @@ public: static Constant *getNot(Constant *C); static Constant *getAdd(Constant *C1, Constant *C2, bool HasNUW = false, bool HasNSW = false); - static Constant *getFAdd(Constant *C1, Constant *C2); static Constant *getSub(Constant *C1, Constant *C2, bool HasNUW = false, bool HasNSW = false); - static Constant *getFSub(Constant *C1, Constant *C2); static Constant *getMul(Constant *C1, Constant *C2, bool HasNUW = false, bool HasNSW = false); - static Constant *getFMul(Constant *C1, Constant *C2); - static Constant *getUDiv(Constant *C1, Constant *C2, bool isExact = false); - static Constant *getSDiv(Constant *C1, Constant *C2, bool isExact = false); - static Constant *getFDiv(Constant *C1, Constant *C2); - static Constant *getURem(Constant *C1, Constant *C2); - static Constant *getSRem(Constant *C1, Constant *C2); - static Constant *getFRem(Constant *C1, Constant *C2); static Constant *getAnd(Constant *C1, Constant *C2); static Constant *getOr(Constant *C1, Constant *C2); static Constant *getXor(Constant *C1, Constant *C2); @@ -1093,14 +1084,6 @@ public: return getShl(C1, C2, true, false); } - static Constant *getExactSDiv(Constant *C1, Constant *C2) { - return getSDiv(C1, C2, true); - } - - static Constant *getExactUDiv(Constant *C1, Constant *C2) { - return getUDiv(C1, C2, true); - } - static Constant *getExactAShr(Constant *C1, Constant *C2) { return getAShr(C1, C2, true); } @@ -1201,10 +1184,6 @@ public: /// Return true if this is a compare constant expression bool isCompare() const; - /// Return true if this is an insertvalue or extractvalue expression, - /// and the getIndices() method may be used. - bool hasIndices() const; - /// Select constant expr /// /// \param OnlyIfReducedTy see \a getWithOperands() docs. @@ -1294,9 +1273,6 @@ public: static Constant *getShuffleVector(Constant *V1, Constant *V2, ArrayRef<int> Mask, Type *OnlyIfReducedTy = nullptr); - static Constant *getInsertValue(Constant *Agg, Constant *Val, - ArrayRef<unsigned> Idxs, - Type *OnlyIfReducedTy = nullptr); /// Return the opcode at the root of this constant expression unsigned getOpcode() const { return getSubclassDataFromValue(); } @@ -1305,10 +1281,6 @@ public: /// FCMP constant expression. unsigned getPredicate() const; - /// Assert that this is an insertvalue or exactvalue - /// expression and return the list of indices. - ArrayRef<unsigned> getIndices() const; - /// Assert that this is a shufflevector and return the mask. See class /// ShuffleVectorInst for a description of the mask representation. ArrayRef<int> getShuffleMask() const; @@ -1352,6 +1324,14 @@ public: /// would make it harder to remove ConstantExprs altogether. Instruction *getAsInstruction(Instruction *InsertBefore = nullptr) const; + /// Whether creating a constant expression for this binary operator is + /// desirable. + static bool isDesirableBinOp(unsigned Opcode); + + /// Whether creating a constant expression for this binary operator is + /// supported. + static bool isSupportedBinOp(unsigned Opcode); + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Value *V) { return V->getValueID() == ConstantExprVal; diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 7c32c5d13760..1d24f527df7b 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -44,3 +44,4 @@ LLVM_FIXED_MD_KIND(MD_noundef, "noundef", 29) LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30) LLVM_FIXED_MD_KIND(MD_nosanitize, "nosanitize", 31) LLVM_FIXED_MD_KIND(MD_func_sanitize, "func_sanitize", 32) +LLVM_FIXED_MD_KIND(MD_exclude, "exclude", 33) diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h index a17423dd965b..06702d3cdf6b 100644 --- a/llvm/include/llvm/IR/GlobalValue.h +++ b/llvm/include/llvm/IR/GlobalValue.h @@ -295,26 +295,38 @@ public: void setPartition(StringRef Part); // ASan, HWASan and Memtag sanitizers have some instrumentation that applies - // specifically to global variables. This instrumentation is implicitly - // applied to all global variables when built with -fsanitize=*. What we need - // is a way to persist the information that a certain global variable should - // *not* have sanitizers applied, which occurs if: - // 1. The global variable is in the sanitizer ignore list, or - // 2. The global variable is created by the sanitizers itself for internal - // usage, or - // 3. The global variable has __attribute__((no_sanitize("..."))) or - // __attribute__((disable_sanitizer_instrumentation)). - // - // This is important, a some IR passes like GlobalMerge can delete global - // variables and replace them with new ones. If the old variables were marked - // to be unsanitized, then the new ones should also be. + // specifically to global variables. struct SanitizerMetadata { SanitizerMetadata() - : NoAddress(false), NoHWAddress(false), NoMemtag(false), - IsDynInit(false) {} + : NoAddress(false), NoHWAddress(false), + Memtag(false), IsDynInit(false) {} + // For ASan and HWASan, this instrumentation is implicitly applied to all + // global variables when built with -fsanitize=*. What we need is a way to + // persist the information that a certain global variable should *not* have + // sanitizers applied, which occurs if: + // 1. The global variable is in the sanitizer ignore list, or + // 2. The global variable is created by the sanitizers itself for internal + // usage, or + // 3. The global variable has __attribute__((no_sanitize("..."))) or + // __attribute__((disable_sanitizer_instrumentation)). + // + // This is important, a some IR passes like GlobalMerge can delete global + // variables and replace them with new ones. If the old variables were + // marked to be unsanitized, then the new ones should also be. unsigned NoAddress : 1; unsigned NoHWAddress : 1; - unsigned NoMemtag : 1; + + // Memtag sanitization works differently: sanitization is requested by clang + // when `-fsanitize=memtag-globals` is provided, and the request can be + // denied (and the attribute removed) by the AArch64 global tagging pass if + // it can't be fulfilled (e.g. the global variable is a TLS variable). + // Memtag sanitization has to interact with other parts of LLVM (like + // supressing certain optimisations, emitting assembly directives, or + // creating special relocation sections). + // + // Use `GlobalValue::isTagged()` to check whether tagging should be enabled + // for a global variable. + unsigned Memtag : 1; // ASan-specific metadata. Is this global variable dynamically initialized // (from a C++ language perspective), and should therefore be checked for @@ -331,6 +343,10 @@ public: void setSanitizerMetadata(SanitizerMetadata Meta); void removeSanitizerMetadata(); + bool isTagged() const { + return hasSanitizerMetadata() && getSanitizerMetadata().Memtag; + } + static LinkageTypes getLinkOnceLinkage(bool ODR) { return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage; } diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index d8f08934b3d6..cec26e966b5c 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1162,11 +1162,11 @@ private: RoundingMode UseRounding = DefaultConstrainedRounding; if (Rounding) - UseRounding = Rounding.getValue(); + UseRounding = Rounding.value(); Optional<StringRef> RoundingStr = convertRoundingModeToStr(UseRounding); assert(RoundingStr && "Garbage strict rounding mode!"); - auto *RoundingMDS = MDString::get(Context, RoundingStr.getValue()); + auto *RoundingMDS = MDString::get(Context, RoundingStr.value()); return MetadataAsValue::get(Context, RoundingMDS); } @@ -1175,11 +1175,11 @@ private: fp::ExceptionBehavior UseExcept = DefaultConstrainedExcept; if (Except) - UseExcept = Except.getValue(); + UseExcept = Except.value(); Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(UseExcept); assert(ExceptStr && "Garbage strict exception behavior!"); - auto *ExceptMDS = MDString::get(Context, ExceptStr.getValue()); + auto *ExceptMDS = MDString::get(Context, ExceptStr.value()); return MetadataAsValue::get(Context, ExceptMDS); } @@ -1588,8 +1588,8 @@ public: Value *CreateFNeg(Value *V, const Twine &Name = "", MDNode *FPMathTag = nullptr) { - if (auto *VC = dyn_cast<Constant>(V)) - return Insert(Folder.CreateFNeg(VC), Name); + if (Value *Res = Folder.FoldUnOpFMF(Instruction::FNeg, V, FMF)) + return Res; return Insert(setFPAttrs(UnaryOperator::CreateFNeg(V), FPMathTag, FMF), Name); } @@ -1598,10 +1598,10 @@ public: /// default FMF. Value *CreateFNegFMF(Value *V, Instruction *FMFSource, const Twine &Name = "") { - if (auto *VC = dyn_cast<Constant>(V)) - return Insert(Folder.CreateFNeg(VC), Name); - return Insert(setFPAttrs(UnaryOperator::CreateFNeg(V), nullptr, - FMFSource->getFastMathFlags()), + FastMathFlags FMF = FMFSource->getFastMathFlags(); + if (Value *Res = Folder.FoldUnOpFMF(Instruction::FNeg, V, FMF)) + return Res; + return Insert(setFPAttrs(UnaryOperator::CreateFNeg(V), nullptr, FMF), Name); } @@ -1612,8 +1612,8 @@ public: Value *CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name = "", MDNode *FPMathTag = nullptr) { - if (auto *VC = dyn_cast<Constant>(V)) - return Insert(Folder.CreateUnOp(Opc, VC), Name); + if (Value *Res = Folder.FoldUnOpFMF(Opc, V, FMF)) + return Res; Instruction *UnOp = UnaryOperator::Create(Opc, V); if (isa<FPMathOperator>(UnOp)) setFPAttrs(UnOp, FPMathTag, FMF); diff --git a/llvm/include/llvm/IR/IRBuilderFolder.h b/llvm/include/llvm/IR/IRBuilderFolder.h index 9505f1e3be2a..b2b27235a1e6 100644 --- a/llvm/include/llvm/IR/IRBuilderFolder.h +++ b/llvm/include/llvm/IR/IRBuilderFolder.h @@ -45,6 +45,9 @@ public: virtual Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FastMathFlags FMF) const = 0; + virtual Value *FoldUnOpFMF(Instruction::UnaryOps Opc, Value *V, + FastMathFlags FMF) const = 0; + virtual Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const = 0; @@ -68,13 +71,6 @@ public: ArrayRef<int> Mask) const = 0; //===--------------------------------------------------------------------===// - // Unary Operators - //===--------------------------------------------------------------------===// - - virtual Value *CreateFNeg(Constant *C) const = 0; - virtual Value *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const = 0; - - //===--------------------------------------------------------------------===// // Cast/Conversion Operators //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h index 57f2da27e04e..032a70efdceb 100644 --- a/llvm/include/llvm/IR/InlineAsm.h +++ b/llvm/include/llvm/IR/InlineAsm.h @@ -24,6 +24,7 @@ namespace llvm { +class Error; class FunctionType; class PointerType; template <class ConstantClass> class ConstantUniqueMap; @@ -83,11 +84,9 @@ public: const std::string &getAsmString() const { return AsmString; } const std::string &getConstraintString() const { return Constraints; } - /// Verify - This static method can be used by the parser to check to see if - /// the specified constraint string is legal for the type. This returns true - /// if legal, false if not. - /// - static bool Verify(FunctionType *Ty, StringRef Constraints); + /// This static method can be used by the parser to check to see if the + /// specified constraint string is legal for the type. + static Error verify(FunctionType *Ty, StringRef Constraints); // Constraint String Parsing enum ConstraintPrefix { diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index d152e86488e1..a14bc39cea65 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -753,8 +753,16 @@ public: /// *p = old - v FSub, + /// *p = maxnum(old, v) + /// \p maxnum matches the behavior of \p llvm.maxnum.*. + FMax, + + /// *p = minnum(old, v) + /// \p minnum matches the behavior of \p llvm.minnum.*. + FMin, + FIRST_BINOP = Xchg, - LAST_BINOP = FSub, + LAST_BINOP = FMin, BAD_BINOP }; @@ -797,6 +805,8 @@ public: switch (Op) { case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: return true; default: return false; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 0dceea13ea36..8bf8e9ca76ad 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1308,7 +1308,7 @@ def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; -def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; +def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], [IntrNoMerge]>; def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>; def int_coro_suspend_retcon : Intrinsic<[llvm_any_ty], [llvm_vararg_ty], []>; def int_coro_prepare_retcon : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 1256ab2c9f84..fc66bdfc35e0 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -931,6 +931,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat } +def llvm_nxv1i1_ty : LLVMType<nxv1i1>; def llvm_nxv2i1_ty : LLVMType<nxv2i1>; def llvm_nxv4i1_ty : LLVMType<nxv4i1>; def llvm_nxv8i1_ty : LLVMType<nxv8i1>; @@ -2592,27 +2593,27 @@ let TargetPrefix = "aarch64" in { // Loads def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_ld1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_ld1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_ld1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_ld1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; + def int_aarch64_sme_ld1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>; + def int_aarch64_sme_ld1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>; + def int_aarch64_sme_ld1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>; + def int_aarch64_sme_ld1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>; def int_aarch64_sme_ld1b_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_ld1h_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_ld1w_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_ld1d_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_ld1q_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; + def int_aarch64_sme_ld1h_vert : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>; + def int_aarch64_sme_ld1w_vert : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>; + def int_aarch64_sme_ld1d_vert : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>; + def int_aarch64_sme_ld1q_vert : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>; // Stores def int_aarch64_sme_st1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_st1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_st1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_st1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_st1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; + def int_aarch64_sme_st1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>; + def int_aarch64_sme_st1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>; + def int_aarch64_sme_st1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>; + def int_aarch64_sme_st1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>; def int_aarch64_sme_st1b_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_st1h_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_st1w_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_st1d_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; + def int_aarch64_sme_st1h_vert : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>; + def int_aarch64_sme_st1w_vert : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>; + def int_aarch64_sme_st1d_vert : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>; + def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>; // Spill + fill def int_aarch64_sme_ldr : DefaultAttrsIntrinsic< @@ -2663,6 +2664,16 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic; def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic; + class SME_AddVectorToTile_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i64_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty]>; + + def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic; + def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic; + // // Counting elements // diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index c2dcfc254568..e81224d9b890 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1338,7 +1338,7 @@ def int_amdgcn_exp : Intrinsic <[], [ LLVMMatchType<0>, // src2 LLVMMatchType<0>, // src3 llvm_i1_ty, // done - llvm_i1_ty // vm + llvm_i1_ty // vm (ignored on GFX11+) ], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>, IntrWriteMem, IntrInaccessibleMemOnly, diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 4a21cf1eb7fc..57c47a15bd70 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -10,11 +10,11 @@ // //===----------------------------------------------------------------------===// -let TargetPrefix = "dxil" in { +let TargetPrefix = "dx" in { -def int_dxil_thread_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; -def int_dxil_group_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; -def int_dxil_thread_id_in_group : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; -def int_dxil_flattened_thread_id_in_group : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrWillReturn]>; +def int_dx_thread_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; +def int_dx_group_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; +def int_dx_thread_id_in_group : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; +def int_dx_flattened_thread_id_in_group : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrWillReturn]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 0930abcc0993..c274e3504250 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -72,6 +72,12 @@ let TargetPrefix = "x86" in { [ImmArg<ArgIndex<1>>]>; } +// Read Processor Register. +let TargetPrefix = "x86" in { + def int_x86_rdpru : ClangBuiltin<"__builtin_ia32_rdpru">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; +} + //===----------------------------------------------------------------------===// // CET SS let TargetPrefix = "x86" in { diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index be359d94f812..ec769ce95160 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -951,7 +951,9 @@ class MDNode : public Metadata { /// The operands are in turn located immediately before the header. /// For resizable MDNodes, the space for the storage vector is also allocated /// immediately before the header, overlapping with the operands. - struct Header { + /// Explicity set alignment because bitfields by default have an + /// alignment of 1 on z/OS. + struct alignas(alignof(size_t)) Header { bool IsResizable : 1; bool IsLarge : 1; size_t SmallSize : 4; @@ -997,7 +999,13 @@ class MDNode : public Metadata { alignTo(getAllocSize(), alignof(uint64_t)); } - void *getLargePtr() const; + void *getLargePtr() const { + static_assert(alignof(LargeStorageVector) <= alignof(Header), + "LargeStorageVector too strongly aligned"); + return reinterpret_cast<char *>(const_cast<Header *>(this)) - + sizeof(LargeStorageVector); + } + void *getSmallPtr(); LargeStorageVector &getLarge() { @@ -1030,6 +1038,12 @@ class MDNode : public Metadata { return makeArrayRef(reinterpret_cast<const MDOperand *>(this) - SmallSize, SmallNumOps); } + + unsigned getNumOperands() const { + if (!IsLarge) + return SmallNumOps; + return getLarge().size(); + } }; Header &getHeader() { return *(reinterpret_cast<Header *>(this) - 1); } @@ -1281,7 +1295,7 @@ public: } /// Return number of MDNode operands. - unsigned getNumOperands() const { return getHeader().operands().size(); } + unsigned getNumOperands() const { return getHeader().getNumOperands(); } /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Metadata *MD) { diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index fc2d60947118..24da08d70b72 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -911,6 +911,10 @@ public: StringRef getStackProtectorGuardReg() const; void setStackProtectorGuardReg(StringRef Reg); + /// Get/set a symbol to use as the stack protector guard. + StringRef getStackProtectorGuardSymbol() const; + void setStackProtectorGuardSymbol(StringRef Symbol); + /// Get/set what offset from the stack protector to use. int getStackProtectorGuardOffset() const; void setStackProtectorGuardOffset(int Offset); diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index b76bc879fb45..f1dd29926278 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1153,8 +1153,8 @@ private: // Used in cases where we want to record the name of a global, but // don't have the string owned elsewhere (e.g. the Strtab on a module). - StringSaver Saver; BumpPtrAllocator Alloc; + StringSaver Saver; // The total number of basic blocks in the module in the per-module summary or // the total number of basic blocks in the LTO unit in the combined index. diff --git a/llvm/include/llvm/IR/NoFolder.h b/llvm/include/llvm/IR/NoFolder.h index 4e9f772dfdb6..56ccfc694c5f 100644 --- a/llvm/include/llvm/IR/NoFolder.h +++ b/llvm/include/llvm/IR/NoFolder.h @@ -65,6 +65,11 @@ public: return nullptr; } + Value *FoldUnOpFMF(Instruction::UnaryOps Opc, Value *V, + FastMathFlags FMF) const override { + return nullptr; + } + Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override { return nullptr; } @@ -103,19 +108,6 @@ public: } //===--------------------------------------------------------------------===// - // Unary Operators - //===--------------------------------------------------------------------===// - - Instruction *CreateFNeg(Constant *C) const override { - return UnaryOperator::CreateFNeg(C); - } - - Instruction *CreateUnOp(Instruction::UnaryOps Opc, - Constant *C) const override { - return UnaryOperator::Create(Opc, C); - } - - //===--------------------------------------------------------------------===// // Cast/Conversion Operators //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h index 63fd98073b51..14488bb1b37c 100644 --- a/llvm/include/llvm/IR/OptBisect.h +++ b/llvm/include/llvm/IR/OptBisect.h @@ -15,7 +15,6 @@ #define LLVM_IR_OPTBISECT_H #include "llvm/ADT/StringRef.h" -#include "llvm/Support/ManagedStatic.h" #include <limits> namespace llvm { @@ -90,7 +89,8 @@ private: /// Singleton instance of the OptBisect class, so multiple pass managers don't /// need to coordinate their uses of OptBisect. -extern ManagedStatic<OptBisect> OptBisector; +OptBisect &getOptBisector(); + } // end namespace llvm #endif // LLVM_IR_OPTBISECT_H diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index 54bb82d84d96..b2ed8e60bd77 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -267,8 +267,12 @@ struct Config { /// the given output file name, and (2) creates a resolution file whose name /// is prefixed by the given output file name and sets ResolutionFile to its /// file handle. + /// + /// SaveTempsArgs can be specified to select which temps to save. + /// If SaveTempsArgs is not provided, all temps are saved. Error addSaveTemps(std::string OutputFileName, - bool UseInputModulePath = false); + bool UseInputModulePath = false, + const DenseSet<StringRef> &SaveTempsArgs = {}); }; struct LTOLLVMDiagnosticHandler : public DiagnosticHandler { diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index ce65b173b3d2..8b2ae84749b4 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -374,8 +374,8 @@ public: Header.RootFile.DirIndex = 0; Header.RootFile.Checksum = Checksum; Header.RootFile.Source = Source; - Header.trackMD5Usage(Checksum.hasValue()); - Header.HasSource = Source.hasValue(); + Header.trackMD5Usage(Checksum.has_value()); + Header.HasSource = Source.has_value(); } void resetFileTable() { Header.resetFileTable(); } diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h index 5eab32cb5c12..33ec0db8fcab 100644 --- a/llvm/include/llvm/MC/MCSymbolWasm.h +++ b/llvm/include/llvm/MC/MCSymbolWasm.h @@ -89,7 +89,7 @@ public: bool hasImportModule() const { return ImportModule.has_value(); } StringRef getImportModule() const { if (ImportModule) - return ImportModule.getValue(); + return ImportModule.value(); // Use a default module name of "env" for now, for compatibility with // existing tools. // TODO(sbc): Find a way to specify a default value in the object format @@ -101,13 +101,13 @@ public: bool hasImportName() const { return ImportName.has_value(); } StringRef getImportName() const { if (ImportName) - return ImportName.getValue(); + return ImportName.value(); return getName(); } void setImportName(StringRef Name) { ImportName = Name; } bool hasExportName() const { return ExportName.has_value(); } - StringRef getExportName() const { return ExportName.getValue(); } + StringRef getExportName() const { return ExportName.value(); } void setExportName(StringRef Name) { ExportName = Name; } bool isFunctionTable() const { @@ -130,14 +130,14 @@ public: const wasm::WasmGlobalType &getGlobalType() const { assert(GlobalType); - return GlobalType.getValue(); + return GlobalType.value(); } void setGlobalType(wasm::WasmGlobalType GT) { GlobalType = GT; } bool hasTableType() const { return TableType.has_value(); } const wasm::WasmTableType &getTableType() const { assert(hasTableType()); - return TableType.getValue(); + return TableType.value(); } void setTableType(wasm::WasmTableType TT) { TableType = TT; } void setTableType(wasm::ValType VT) { diff --git a/llvm/include/llvm/MC/MCSymbolXCOFF.h b/llvm/include/llvm/MC/MCSymbolXCOFF.h index 2ec265e66300..cc19f882e6ad 100644 --- a/llvm/include/llvm/MC/MCSymbolXCOFF.h +++ b/llvm/include/llvm/MC/MCSymbolXCOFF.h @@ -40,7 +40,7 @@ public: XCOFF::StorageClass getStorageClass() const { assert(StorageClass && "StorageClass not set on XCOFF MCSymbol."); - return StorageClass.getValue(); + return StorageClass.value(); } StringRef getUnqualifiedName() const { return getUnqualifiedName(getName()); } diff --git a/llvm/include/llvm/ObjCopy/CommonConfig.h b/llvm/include/llvm/ObjCopy/CommonConfig.h index 24503caed342..4921f5281ca6 100644 --- a/llvm/include/llvm/ObjCopy/CommonConfig.h +++ b/llvm/include/llvm/ObjCopy/CommonConfig.h @@ -241,6 +241,7 @@ struct CommonConfig { StringMap<SectionRename> SectionsToRename; StringMap<uint64_t> SetSectionAlignment; StringMap<SectionFlagsUpdate> SetSectionFlags; + StringMap<uint64_t> SetSectionType; StringMap<StringRef> SymbolsToRename; // Symbol info specified by --add-symbol option. diff --git a/llvm/include/llvm/Object/Decompressor.h b/llvm/include/llvm/Object/Decompressor.h index e04ee3c3e4c0..00b6c2016742 100644 --- a/llvm/include/llvm/Object/Decompressor.h +++ b/llvm/include/llvm/Object/Decompressor.h @@ -33,12 +33,12 @@ public: /// @param Out Destination buffer. template <class T> Error resizeAndDecompress(T &Out) { Out.resize(DecompressedSize); - return decompress({Out.data(), (size_t)DecompressedSize}); + return decompress({(uint8_t *)Out.data(), (size_t)DecompressedSize}); } /// Uncompress section data to raw buffer provided. /// @param Buffer Destination buffer. - Error decompress(MutableArrayRef<char> Buffer); + Error decompress(MutableArrayRef<uint8_t> Buffer); /// Return memory buffer size required for decompression. uint64_t getDecompressedSize() { return DecompressedSize; } diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index 6a1ac7213dad..645c3b8963f5 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -303,6 +303,9 @@ ARM_CPU_NAME("cortex-m33", ARMV8MMainline, FK_FPV5_SP_D16, false, ARM::AEK_DSP) ARM_CPU_NAME("cortex-m35p", ARMV8MMainline, FK_FPV5_SP_D16, false, ARM::AEK_DSP) ARM_CPU_NAME("cortex-m55", ARMV8_1MMainline, FK_FP_ARMV8_FULLFP16_D16, false, (ARM::AEK_DSP | ARM::AEK_SIMD | ARM::AEK_FP | ARM::AEK_FP16)) +ARM_CPU_NAME("cortex-m85", ARMV8_1MMainline, FK_FP_ARMV8_FULLFP16_D16, false, + (ARM::AEK_DSP | ARM::AEK_SIMD | ARM::AEK_FP | ARM::AEK_FP16 | + ARM::AEK_RAS | ARM::AEK_PACBTI)) ARM_CPU_NAME("cortex-a32", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("cortex-a35", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h index 5ca0c9decac3..041729fa6594 100644 --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -63,7 +63,9 @@ template <typename AllocatorT = MallocAllocator, size_t SlabSize = 4096, class BumpPtrAllocatorImpl : public AllocatorBase<BumpPtrAllocatorImpl<AllocatorT, SlabSize, SizeThreshold, GrowthDelay>>, - private AllocatorT { + private detail::AllocatorHolder<AllocatorT> { + using AllocTy = detail::AllocatorHolder<AllocatorT>; + public: static_assert(SizeThreshold <= SlabSize, "The SizeThreshold must be at most the SlabSize to ensure " @@ -77,12 +79,12 @@ public: template <typename T> BumpPtrAllocatorImpl(T &&Allocator) - : AllocatorT(std::forward<T &&>(Allocator)) {} + : AllocTy(std::forward<T &&>(Allocator)) {} // Manually implement a move constructor as we must clear the old allocator's // slabs as a matter of correctness. BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old) - : AllocatorT(static_cast<AllocatorT &&>(Old)), CurPtr(Old.CurPtr), + : AllocTy(std::move(Old.getAllocator())), CurPtr(Old.CurPtr), End(Old.End), Slabs(std::move(Old.Slabs)), CustomSizedSlabs(std::move(Old.CustomSizedSlabs)), BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize) { @@ -107,7 +109,7 @@ public: RedZoneSize = RHS.RedZoneSize; Slabs = std::move(RHS.Slabs); CustomSizedSlabs = std::move(RHS.CustomSizedSlabs); - AllocatorT::operator=(static_cast<AllocatorT &&>(RHS)); + AllocTy::operator=(std::move(RHS.getAllocator())); RHS.CurPtr = RHS.End = nullptr; RHS.BytesAllocated = 0; @@ -175,7 +177,7 @@ public: size_t PaddedSize = SizeToAllocate + Alignment.value() - 1; if (PaddedSize > SizeThreshold) { void *NewSlab = - AllocatorT::Allocate(PaddedSize, alignof(std::max_align_t)); + this->getAllocator().Allocate(PaddedSize, alignof(std::max_align_t)); // We own the new slab and don't want anyone reading anyting other than // pieces returned from this method. So poison the whole slab. __asan_poison_memory_region(NewSlab, PaddedSize); @@ -334,8 +336,8 @@ private: void StartNewSlab() { size_t AllocatedSlabSize = computeSlabSize(Slabs.size()); - void *NewSlab = - AllocatorT::Allocate(AllocatedSlabSize, alignof(std::max_align_t)); + void *NewSlab = this->getAllocator().Allocate(AllocatedSlabSize, + alignof(std::max_align_t)); // We own the new slab and don't want anyone reading anything other than // pieces returned from this method. So poison the whole slab. __asan_poison_memory_region(NewSlab, AllocatedSlabSize); @@ -351,7 +353,8 @@ private: for (; I != E; ++I) { size_t AllocatedSlabSize = computeSlabSize(std::distance(Slabs.begin(), I)); - AllocatorT::Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t)); + this->getAllocator().Deallocate(*I, AllocatedSlabSize, + alignof(std::max_align_t)); } } @@ -360,7 +363,7 @@ private: for (auto &PtrAndSize : CustomSizedSlabs) { void *Ptr = PtrAndSize.first; size_t Size = PtrAndSize.second; - AllocatorT::Deallocate(Ptr, Size, alignof(std::max_align_t)); + this->getAllocator().Deallocate(Ptr, Size, alignof(std::max_align_t)); } } diff --git a/llvm/include/llvm/Support/AllocatorBase.h b/llvm/include/llvm/Support/AllocatorBase.h index eccced1d1ff4..5d05d3f8777b 100644 --- a/llvm/include/llvm/Support/AllocatorBase.h +++ b/llvm/include/llvm/Support/AllocatorBase.h @@ -99,6 +99,28 @@ public: void PrintStats() const {} }; +namespace detail { + +template <typename Alloc> class AllocatorHolder : Alloc { +public: + AllocatorHolder() = default; + AllocatorHolder(const Alloc &A) : Alloc(A) {} + AllocatorHolder(Alloc &&A) : Alloc(static_cast<Alloc &&>(A)) {} + Alloc &getAllocator() { return *this; } + const Alloc &getAllocator() const { return *this; } +}; + +template <typename Alloc> class AllocatorHolder<Alloc &> { + Alloc &A; + +public: + AllocatorHolder(Alloc &A) : A(A) {} + Alloc &getAllocator() { return A; } + const Alloc &getAllocator() const { return A; } +}; + +} // namespace detail + } // namespace llvm #endif // LLVM_SUPPORT_ALLOCATORBASE_H diff --git a/llvm/include/llvm/Support/Casting.h b/llvm/include/llvm/Support/Casting.h index 894c1f439b64..5444d777b749 100644 --- a/llvm/include/llvm/Support/Casting.h +++ b/llvm/include/llvm/Support/Casting.h @@ -638,9 +638,7 @@ template <typename T, typename Enable = void> struct ValueIsPresent { template <typename T> struct ValueIsPresent<Optional<T>> { using UnwrappedType = T; static inline bool isPresent(const Optional<T> &t) { return t.has_value(); } - static inline decltype(auto) unwrapValue(Optional<T> &t) { - return t.getValue(); - } + static inline decltype(auto) unwrapValue(Optional<T> &t) { return t.value(); } }; // If something is "nullable" then we just compare it to nullptr to see if it diff --git a/llvm/include/llvm/Support/CodeGen.h b/llvm/include/llvm/Support/CodeGen.h index 71d0ddbfe05e..425d3a3d95d4 100644 --- a/llvm/include/llvm/Support/CodeGen.h +++ b/llvm/include/llvm/Support/CodeGen.h @@ -103,6 +103,13 @@ namespace llvm { Async = 2, ///< "Asynchronous" unwind tables (instr precise) Default = 2, }; + + enum class FunctionReturnThunksKind : unsigned int { + Keep = 0, ///< No function return thunk. + Extern = 1, ///< Replace returns with jump to thunk, don't emit thunk. + Invalid = 2, ///< Not used. + }; + } // namespace llvm #endif diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h index e6f898229412..c99f811459ab 100644 --- a/llvm/include/llvm/Support/Compression.h +++ b/llvm/include/llvm/Support/Compression.h @@ -13,6 +13,7 @@ #ifndef LLVM_SUPPORT_COMPRESSION_H #define LLVM_SUPPORT_COMPRESSION_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -20,28 +21,30 @@ template <typename T> class SmallVectorImpl; class Error; class StringRef; +namespace compression { namespace zlib { -static constexpr int NoCompression = 0; -static constexpr int BestSpeedCompression = 1; -static constexpr int DefaultCompression = 6; -static constexpr int BestSizeCompression = 9; +constexpr int NoCompression = 0; +constexpr int BestSpeedCompression = 1; +constexpr int DefaultCompression = 6; +constexpr int BestSizeCompression = 9; bool isAvailable(); -void compress(StringRef InputBuffer, SmallVectorImpl<char> &CompressedBuffer, +void compress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &CompressedBuffer, int Level = DefaultCompression); -Error uncompress(StringRef InputBuffer, char *UncompressedBuffer, +Error uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer, size_t &UncompressedSize); -Error uncompress(StringRef InputBuffer, - SmallVectorImpl<char> &UncompressedBuffer, +Error uncompress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &UncompressedBuffer, size_t UncompressedSize); -uint32_t crc32(StringRef Buffer); +} // End of namespace zlib -} // End of namespace zlib +} // End of namespace compression } // End of namespace llvm diff --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h index 662f3aca5b54..1e05cfe1f424 100644 --- a/llvm/include/llvm/Support/ConvertUTF.h +++ b/llvm/include/llvm/Support/ConvertUTF.h @@ -181,6 +181,8 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd); +unsigned getUTF8SequenceSize(const UTF8 *source, const UTF8 *sourceEnd); + unsigned getNumBytesForUTF8(UTF8 firstByte); /*************************************************************************/ diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h index 1a801b6f2c7a..3c2c2c8b8ceb 100644 --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -1270,7 +1270,7 @@ public: assert(Err && "Trying to log after takeError()."); OS << "'" << FileName << "': "; if (Line) - OS << "line " << Line.getValue() << ": "; + OS << "line " << Line.value() << ": "; Err->log(OS); } diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 8df7ced0029d..5d6be0fe655e 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -384,6 +384,8 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX) HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FADD) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FSUB) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMAX) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMIN) // Generic atomic fence HANDLE_TARGET_OPCODE(G_FENCE) diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index 58fa3b3842e7..1a928e5a9acc 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -178,6 +178,7 @@ X86_FEATURE (PREFETCHWT1, "prefetchwt1") X86_FEATURE (PRFCHW, "prfchw") X86_FEATURE (PTWRITE, "ptwrite") X86_FEATURE (RDPID, "rdpid") +X86_FEATURE (RDPRU, "rdpru") X86_FEATURE (RDRND, "rdrnd") X86_FEATURE (RDSEED, "rdseed") X86_FEATURE (RTM, "rtm") diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 3e2f18b57d1e..5652e60d081c 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1126,6 +1126,8 @@ def G_ATOMICRMW_UMAX : G_ATOMICRMW_OP; def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP; def G_ATOMICRMW_FADD : G_ATOMICRMW_OP; def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP; +def G_ATOMICRMW_FMAX : G_ATOMICRMW_OP; +def G_ATOMICRMW_FMIN : G_ATOMICRMW_OP; def G_FENCE : GenericInstruction { let OutOperandList = (outs); diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 12eee24b578f..ef4fc85b245d 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -184,6 +184,8 @@ def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = true; } def : GINodeEquiv<G_LOAD, atomic_load> { let CheckMMOIsNonAtomic = false; let CheckMMOIsAtomic = true; + let IfSignExtend = G_SEXTLOAD; + let IfZeroExtend = G_ZEXTLOAD; } // Operands are swapped for atomic_store vs. regular store @@ -206,6 +208,8 @@ def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin>; def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax>; def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd>; def : GINodeEquiv<G_ATOMICRMW_FSUB, atomic_load_fsub>; +def : GINodeEquiv<G_ATOMICRMW_FMAX, atomic_load_fmax>; +def : GINodeEquiv<G_ATOMICRMW_FMIN, atomic_load_fmin>; def : GINodeEquiv<G_FENCE, atomic_fence>; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 47b686aca7b5..171fdb1b98e0 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -651,6 +651,10 @@ def atomic_load_fadd : SDNode<"ISD::ATOMIC_LOAD_FADD" , SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_fsub : SDNode<"ISD::ATOMIC_LOAD_FSUB" , SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_fmax : SDNode<"ISD::ATOMIC_LOAD_FMAX", SDTFPAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_fmin : SDNode<"ISD::ATOMIC_LOAD_FMIN", SDTFPAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; @@ -805,6 +809,10 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}], // They will be tested prior to the code in pred and must not be used in // ImmLeaf and its subclasses. + // If set to true, a predicate is added that checks for the absence of use of + // the first result. + bit HasNoUse = ?; + // Is the desired pre-packaged predicate for a load? bit IsLoad = ?; // Is the desired pre-packaged predicate for a store? @@ -1619,18 +1627,34 @@ defm atomic_load_umax : binary_atomic_op<atomic_load_umax>; defm atomic_store : binary_atomic_op<atomic_store>; defm atomic_cmp_swap : ternary_atomic_op<atomic_cmp_swap>; +/// Atomic load which zeroes the excess high bits. +def atomic_load_zext : + PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let IsZeroExtLoad = true; +} + +/// Atomic load which sign extends the excess high bits. +def atomic_load_sext : + PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let IsSignExtLoad = true; +} + def atomic_load_8 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; let MemoryVT = i8; } + def atomic_load_16 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; let MemoryVT = i16; } + def atomic_load_32 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { @@ -1644,6 +1668,40 @@ def atomic_load_64 : let MemoryVT = i64; } +def atomic_load_zext_8 : + PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i8; +} + +def atomic_load_zext_16 : + PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i16; +} + +def atomic_load_sext_8 : + PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i8; +} + +def atomic_load_sext_16 : + PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i16; +} + +// Atomic load which zeroes or anyextends the high bits. +def atomic_load_az_8 : PatFrags<(ops node:$op), + [(atomic_load_8 node:$op), + (atomic_load_zext_8 node:$op)]>; + +// Atomic load which zeroes or anyextends the high bits. +def atomic_load_az_16 : PatFrags<(ops node:$op), + [(atomic_load_16 node:$op), + (atomic_load_zext_16 node:$op)]>; + def nonext_masked_gather : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index 2676f2705424..c8ea1f5b6624 100644 --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -211,8 +211,6 @@ private: void addExtensionsToPM(ExtensionPointTy ETy, legacy::PassManagerBase &PM) const; void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const; - void addLTOOptimizationPasses(legacy::PassManagerBase &PM); - void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM); void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM); void addVectorPasses(legacy::PassManagerBase &PM, bool IsFullLTO); diff --git a/llvm/include/llvm/Transforms/Utils/Debugify.h b/llvm/include/llvm/Transforms/Utils/Debugify.h index 405bbb8e0be8..24b9eeab6ee4 100644 --- a/llvm/include/llvm/Transforms/Utils/Debugify.h +++ b/llvm/include/llvm/Transforms/Utils/Debugify.h @@ -101,7 +101,18 @@ llvm::FunctionPass *createDebugifyFunctionPass( llvm::StringRef NameOfWrappedPass = "", DebugInfoPerPass *DebugInfoBeforePass = nullptr); -struct NewPMDebugifyPass : public llvm::PassInfoMixin<NewPMDebugifyPass> { +class NewPMDebugifyPass : public llvm::PassInfoMixin<NewPMDebugifyPass> { + llvm::StringRef NameOfWrappedPass; + DebugInfoPerPass *DebugInfoBeforePass = nullptr; + enum DebugifyMode Mode = DebugifyMode::NoDebugify; +public: + NewPMDebugifyPass( + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + llvm::StringRef NameOfWrappedPass = "", + DebugInfoPerPass *DebugInfoBeforePass = nullptr) + : NameOfWrappedPass(NameOfWrappedPass), + DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode) {} + llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &AM); }; @@ -148,18 +159,65 @@ llvm::FunctionPass *createCheckDebugifyFunctionPass( DebugInfoPerPass *DebugInfoBeforePass = nullptr, llvm::StringRef OrigDIVerifyBugsReportFilePath = ""); -struct NewPMCheckDebugifyPass +class NewPMCheckDebugifyPass : public llvm::PassInfoMixin<NewPMCheckDebugifyPass> { + llvm::StringRef NameOfWrappedPass; + llvm::StringRef OrigDIVerifyBugsReportFilePath; + DebugifyStatsMap *StatsMap; + DebugInfoPerPass *DebugInfoBeforePass; + enum DebugifyMode Mode; + bool Strip; +public: + NewPMCheckDebugifyPass( + bool Strip = false, llvm::StringRef NameOfWrappedPass = "", + DebugifyStatsMap *StatsMap = nullptr, + enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo, + DebugInfoPerPass *DebugInfoBeforePass = nullptr, + llvm::StringRef OrigDIVerifyBugsReportFilePath = "") + : NameOfWrappedPass(NameOfWrappedPass), + OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath), + StatsMap(StatsMap), DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode), + Strip(Strip) {} + llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &AM); }; namespace llvm { void exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map); -struct DebugifyEachInstrumentation { - DebugifyStatsMap StatsMap; +class DebugifyEachInstrumentation { + llvm::StringRef OrigDIVerifyBugsReportFilePath = ""; + DebugInfoPerPass *DebugInfoBeforePass = nullptr; + enum DebugifyMode Mode = DebugifyMode::NoDebugify; + DebugifyStatsMap *DIStatsMap = nullptr; + +public: void registerCallbacks(PassInstrumentationCallbacks &PIC); + // Used within DebugifyMode::SyntheticDebugInfo mode. + void setDIStatsMap(DebugifyStatsMap &StatMap) { DIStatsMap = &StatMap; } + const DebugifyStatsMap &getDebugifyStatsMap() const { return *DIStatsMap; } + // Used within DebugifyMode::OriginalDebugInfo mode. + void setDebugInfoBeforePass(DebugInfoPerPass &PerPassMap) { + DebugInfoBeforePass = &PerPassMap; + } + DebugInfoPerPass &getDebugInfoPerPass() { return *DebugInfoBeforePass; } + + void setOrigDIVerifyBugsReportFilePath(StringRef BugsReportFilePath) { + OrigDIVerifyBugsReportFilePath = BugsReportFilePath; + } + StringRef getOrigDIVerifyBugsReportFilePath() const { + return OrigDIVerifyBugsReportFilePath; + } + + void setDebugifyMode(enum DebugifyMode M) { Mode = M; } + + bool isSyntheticDebugInfo() const { + return Mode == DebugifyMode::SyntheticDebugInfo; + } + bool isOriginalDebugInfoMode() const { + return Mode == DebugifyMode::OriginalDebugInfo; + } }; /// DebugifyCustomPassManager wraps each pass with the debugify passes if diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 676c0c1487db..adb39a410b55 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -435,7 +435,13 @@ bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed); -enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; +enum ReplaceExitVal { + NeverRepl, + OnlyCheapRepl, + NoHardUse, + UnusedIndVarInLoop, + AlwaysRepl +}; /// If the final value of any expressions that are recurrent in the loop can /// be computed, substitute the exit values from the loop into any instructions diff --git a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h index 85263fc00bc3..335cf7acc2f7 100644 --- a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h +++ b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h @@ -109,7 +109,8 @@ void filterDeadComdatFunctions( std::string getUniqueModuleId(Module *M); /// Embed the memory buffer \p Buf into the module \p M as a global using the -/// specified section name. +/// specified section name. Also provide a metadata entry to identify it in the +/// module using the same section name. void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName, Align Alignment = Align(1)); diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index b01bd222b252..2d69c2f86642 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -219,16 +219,9 @@ public: ExactFPMathInst = I; } - void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } - Instruction *getExactFPInst() { return ExactFPMathInst; } - unsigned getNumRuntimePointerChecks() const { - return NumRuntimePointerChecks; - } - private: - unsigned NumRuntimePointerChecks = 0; Instruction *ExactFPMathInst = nullptr; }; diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 1d880424e55c..428ae8975c30 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -629,9 +630,10 @@ computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, if (!CmpLHSConst || !llvm::is_contained(successors(BB), B)) continue; // First collapse InstChain + const DataLayout &DL = BB->getModule()->getDataLayout(); for (Instruction *I : llvm::reverse(InstChain)) { - CmpLHSConst = ConstantExpr::get(I->getOpcode(), CmpLHSConst, - cast<Constant>(I->getOperand(1)), true); + CmpLHSConst = ConstantFoldBinaryOpOperands( + I->getOpcode(), CmpLHSConst, cast<Constant>(I->getOperand(1)), DL); if (!CmpLHSConst) break; } @@ -826,9 +828,8 @@ void BranchProbabilityInfo::computeEestimateBlockWeight( if (auto BBWeight = getInitialEstimatedBlockWeight(BB)) // If we were able to find estimated weight for the block set it to this // block and propagate up the IR. - propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, - BBWeight.getValue(), BlockWorkList, - LoopWorkList); + propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, BBWeight.value(), + BlockWorkList, LoopWorkList); // BlockWorklist/LoopWorkList contains blocks/loops with at least one // successor/exit having estimated weight. Try to propagate weight to such diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index a81041845052..aa4da27be4e5 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/VectorUtils.h" #include "llvm/Config/config.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantFold.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -1142,8 +1143,12 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL, Ops.push_back(NewC); } - if (auto *CE = dyn_cast<ConstantExpr>(C)) - return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI); + if (auto *CE = dyn_cast<ConstantExpr>(C)) { + if (Constant *Res = + ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI)) + return Res; + return const_cast<Constant *>(C); + } assert(isa<ConstantVector>(C)); return ConstantVector::get(Ops); @@ -1339,7 +1344,9 @@ Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL)) return C; - return ConstantExpr::get(Opcode, LHS, RHS); + if (ConstantExpr::isDesirableBinOp(Opcode)) + return ConstantExpr::get(Opcode, LHS, RHS); + return ConstantFoldBinaryInstruction(Opcode, LHS, RHS); } Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I, @@ -1390,6 +1397,8 @@ Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS, // Calculate constant result. Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL); + if (!C) + return nullptr; // Flush denormal output if needed. return FlushFPConstant(C, I, /* IsOutput */ true); diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp index e82d2fae9356..db6eae0d962a 100644 --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -256,22 +256,6 @@ FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) { return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min); } -FunctionModRefBehavior -GlobalsAAResult::getModRefBehavior(const CallBase *Call) { - FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; - - if (!Call->hasOperandBundles()) - if (const Function *F = Call->getCalledFunction()) - if (FunctionInfo *FI = getFunctionInfo(F)) { - if (!isModOrRefSet(FI->getModRefInfo())) - Min = FMRB_DoesNotAccessMemory; - else if (!isModSet(FI->getModRefInfo())) - Min = FMRB_OnlyReadsMemory; - } - - return FunctionModRefBehavior(AAResultBase::getModRefBehavior(Call) & Min); -} - /// Returns the function info for the function, or null if we don't have /// anything useful to say about it. GlobalsAAResult::FunctionInfo * diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index 3d51042f4da8..a681c528e690 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -184,8 +184,8 @@ CmpInst::Predicate IRInstructionData::getPredicate() const { "Can only get a predicate from a compare instruction"); if (RevisedPredicate) - return RevisedPredicate.getValue(); - + return RevisedPredicate.value(); + return cast<CmpInst>(Inst)->getPredicate(); } diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp index cf8592c41eda..3fafc3057a13 100644 --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -56,10 +56,10 @@ static cl::opt<int> cl::desc("Scale to limit the cost of inline deferral"), cl::init(2), cl::Hidden); -static cl::opt<bool> AnnotateInlinePhase( - "annotate-inline-phase", cl::Hidden, cl::init(false), - cl::desc("If true, annotate inline advisor remarks " - "with LTO and pass information.")); +static cl::opt<bool> + AnnotateInlinePhase("annotate-inline-phase", cl::Hidden, cl::init(false), + cl::desc("If true, annotate inline advisor remarks " + "with LTO and pass information.")); extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats; @@ -514,8 +514,9 @@ void llvm::emitInlinedIntoBasedOnCost( InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM, Optional<InlineContext> IC) : M(M), FAM(FAM), IC(IC), - AnnotatedInlinePassName((IC && AnnotateInlinePhase) ? llvm::AnnotateInlinePassName(*IC) - : DEBUG_TYPE) { + AnnotatedInlinePassName((IC && AnnotateInlinePhase) + ? llvm::AnnotateInlinePassName(*IC) + : DEBUG_TYPE) { if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { ImportedFunctionsStats = std::make_unique<ImportedFunctionsInliningStatistics>(); diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index e63497260e6e..9f8a5e472f01 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -131,6 +131,12 @@ static cl::opt<size_t> cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit")); +static cl::opt<size_t> + RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, + cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), + cl::desc("Do not inline recursive functions with a stack " + "size that exceeds the specified limit")); + static cl::opt<bool> OptComputeFullInlineCost( "inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " @@ -702,7 +708,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { assert(BFI && "BFI must be available"); auto ProfileCount = BFI->getBlockProfileCount(BB); assert(ProfileCount); - if (ProfileCount.getValue() == 0) + if (ProfileCount.value() == 0) ColdSize += Cost - CostAtBBStart; } @@ -827,7 +833,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB); assert(ProfileCount); - CurrentSavings *= ProfileCount.getValue(); + CurrentSavings *= ProfileCount.value(); CycleSavings += CurrentSavings; } @@ -1781,12 +1787,12 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // return min(A, B) if B is valid. auto MinIfValid = [](int A, Optional<int> B) { - return B ? std::min(A, B.getValue()) : A; + return B ? std::min(A, B.value()) : A; }; // return max(A, B) if B is valid. auto MaxIfValid = [](int A, Optional<int> B) { - return B ? std::max(A, B.getValue()) : A; + return B ? std::max(A, B.value()) : A; }; // Various bonus percentages. These are multiplied by Threshold to get the @@ -2444,8 +2450,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB, // If the caller is a recursive function then we don't want to inline // functions which allocate a lot of stack space because it would increase // the caller stack usage dramatically. - if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { + if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) { auto IR = InlineResult::failure("recursive and allocates too much stack space"); if (ORE) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 013e4d6489fa..4691aebbdfe1 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4849,12 +4849,6 @@ static Value *simplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues, return UndefValue::get(PN->getType()); if (HasUndefInput) { - // We cannot start executing a trapping constant expression on more control - // flow paths. - auto *C = dyn_cast<Constant>(CommonValue); - if (C && C->canTrap()) - return nullptr; - // If we have a PHI node like phi(X, undef, X), where X is defined by some // instruction, we cannot return X as the result of the PHI node unless it // dominates the PHI block. @@ -6117,8 +6111,8 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { Value *Op2 = Call->getArgOperand(2); auto *FPI = cast<ConstrainedFPIntrinsic>(Call); if (Value *V = simplifyFPOp({Op0, Op1, Op2}, {}, Q, - FPI->getExceptionBehavior().getValue(), - FPI->getRoundingMode().getValue())) + FPI->getExceptionBehavior().value(), + FPI->getRoundingMode().value())) return V; return nullptr; } @@ -6182,38 +6176,33 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } case Intrinsic::experimental_constrained_fadd: { auto *FPI = cast<ConstrainedFPIntrinsic>(Call); - return simplifyFAddInst(FPI->getArgOperand(0), FPI->getArgOperand(1), - FPI->getFastMathFlags(), Q, - FPI->getExceptionBehavior().getValue(), - FPI->getRoundingMode().getValue()); + return simplifyFAddInst( + FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), + Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value()); } case Intrinsic::experimental_constrained_fsub: { auto *FPI = cast<ConstrainedFPIntrinsic>(Call); - return simplifyFSubInst(FPI->getArgOperand(0), FPI->getArgOperand(1), - FPI->getFastMathFlags(), Q, - FPI->getExceptionBehavior().getValue(), - FPI->getRoundingMode().getValue()); + return simplifyFSubInst( + FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), + Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value()); } case Intrinsic::experimental_constrained_fmul: { auto *FPI = cast<ConstrainedFPIntrinsic>(Call); - return simplifyFMulInst(FPI->getArgOperand(0), FPI->getArgOperand(1), - FPI->getFastMathFlags(), Q, - FPI->getExceptionBehavior().getValue(), - FPI->getRoundingMode().getValue()); + return simplifyFMulInst( + FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), + Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value()); } case Intrinsic::experimental_constrained_fdiv: { auto *FPI = cast<ConstrainedFPIntrinsic>(Call); - return simplifyFDivInst(FPI->getArgOperand(0), FPI->getArgOperand(1), - FPI->getFastMathFlags(), Q, - FPI->getExceptionBehavior().getValue(), - FPI->getRoundingMode().getValue()); + return simplifyFDivInst( + FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), + Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value()); } case Intrinsic::experimental_constrained_frem: { auto *FPI = cast<ConstrainedFPIntrinsic>(Call); - return simplifyFRemInst(FPI->getArgOperand(0), FPI->getArgOperand(1), - FPI->getFastMathFlags(), Q, - FPI->getExceptionBehavior().getValue(), - FPI->getRoundingMode().getValue()); + return simplifyFRemInst( + FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), + Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value()); } default: return nullptr; diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 8a8e9e923b7c..d49b20798c82 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -921,7 +921,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueCast( if (!LHSRes) // More work to do before applying this transfer rule. return None; - const ConstantRange &LHSRange = LHSRes.getValue(); + const ConstantRange &LHSRange = LHSRes.value(); const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth(); @@ -946,8 +946,8 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueBinaryOpImpl( // More work to do before applying this transfer rule. return None; - const ConstantRange &LHSRange = LHSRes.getValue(); - const ConstantRange &RHSRange = RHSRes.getValue(); + const ConstantRange &LHSRange = LHSRes.value(); + const ConstantRange &RHSRange = RHSRes.value(); return ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange)); } diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index bc1d82cf1480..938d950e6da7 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -405,7 +405,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, Align Alignment, Instruction *ScanFrom, const DominatorTree *DT, const TargetLibraryInfo *TLI) { - APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)); + TypeSize TySize = DL.getTypeStoreSize(Ty); + if (TySize.isScalable()) + return false; + APInt Size(DL.getIndexTypeSizeInBits(V->getType()), TySize.getFixedValue()); return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT, TLI); } diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index 91501b04448e..f5b121c98ec4 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -501,10 +501,10 @@ Optional<StringRef> llvm::getAllocationFamily(const Value *I, return None; const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI); if (AllocData) - return mangledNameForMallocFamily(AllocData.getValue().Family); + return mangledNameForMallocFamily(AllocData.value().Family); const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn); if (FreeData) - return mangledNameForMallocFamily(FreeData.getValue().Family); + return mangledNameForMallocFamily(FreeData.value().Family); return None; } diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp index 5cff986245b9..ad8322d7bd79 100644 --- a/llvm/lib/Analysis/MustExecute.cpp +++ b/llvm/lib/Analysis/MustExecute.cpp @@ -493,7 +493,7 @@ static V getOrCreateCachedOptional(K Key, DenseMap<K, Optional<V>> &Map, Optional<V> &OptVal = Map[Key]; if (!OptVal) OptVal = Fn(std::forward<ArgsTy>(args)...); - return OptVal.getValue(); + return OptVal.value(); } const BasicBlock * diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 9d5fa6d0a41b..64844f534332 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -279,19 +279,19 @@ ProfileSummaryInfo::computeThreshold(int PercentileCutoff) const { } bool ProfileSummaryInfo::hasHugeWorkingSetSize() const { - return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue(); + return HasHugeWorkingSetSize && HasHugeWorkingSetSize.value(); } bool ProfileSummaryInfo::hasLargeWorkingSetSize() const { - return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue(); + return HasLargeWorkingSetSize && HasLargeWorkingSetSize.value(); } bool ProfileSummaryInfo::isHotCount(uint64_t C) const { - return HotCountThreshold && C >= HotCountThreshold.getValue(); + return HotCountThreshold && C >= HotCountThreshold.value(); } bool ProfileSummaryInfo::isColdCount(uint64_t C) const { - return ColdCountThreshold && C <= ColdCountThreshold.getValue(); + return ColdCountThreshold && C <= ColdCountThreshold.value(); } template <bool isHot> @@ -299,9 +299,9 @@ bool ProfileSummaryInfo::isHotOrColdCountNthPercentile(int PercentileCutoff, uint64_t C) const { auto CountThreshold = computeThreshold(PercentileCutoff); if (isHot) - return CountThreshold && C >= CountThreshold.getValue(); + return CountThreshold && C >= CountThreshold.value(); else - return CountThreshold && C <= CountThreshold.getValue(); + return CountThreshold && C <= CountThreshold.value(); } bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 207f4df79e45..f61806bd1dad 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2319,9 +2319,13 @@ bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, return A == B; } -std::pair<SCEV::NoWrapFlags, bool /*Deduced*/> +Optional<SCEV::NoWrapFlags> ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp( const OverflowingBinaryOperator *OBO) { + // It cannot be done any better. + if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap()) + return None; + SCEV::NoWrapFlags Flags = SCEV::NoWrapFlags::FlagAnyWrap; if (OBO->hasNoUnsignedWrap()) @@ -2331,13 +2335,10 @@ ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp( bool Deduced = false; - if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap()) - return {Flags, Deduced}; - if (OBO->getOpcode() != Instruction::Add && OBO->getOpcode() != Instruction::Sub && OBO->getOpcode() != Instruction::Mul) - return {Flags, Deduced}; + return None; const SCEV *LHS = getSCEV(OBO->getOperand(0)); const SCEV *RHS = getSCEV(OBO->getOperand(1)); @@ -2356,7 +2357,9 @@ ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp( Deduced = true; } - return {Flags, Deduced}; + if (Deduced) + return Flags; + return None; } // We're trying to construct a SCEV of type `Type' with `Ops' as operands and @@ -4835,7 +4838,7 @@ public: Optional<const SCEV *> Res = compareWithBackedgeCondition(SI->getCondition()); if (Res) { - bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne(); + bool IsOne = cast<SCEVConstant>(Res.value())->getValue()->isOne(); Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue()); } break; @@ -4843,7 +4846,7 @@ public: default: { Optional<const SCEV *> Res = compareWithBackedgeCondition(I); if (Res) - Result = Res.getValue(); + Result = Res.value(); break; } } @@ -6583,8 +6586,8 @@ ScalarEvolution::getRangeRef(const SCEV *S, // Check if the IR explicitly contains !range metadata. Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue()); if (MDRange) - ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(), - RangeType); + ConservativeResult = + ConservativeResult.intersectWith(MDRange.value(), RangeType); // Use facts about recurrences in the underlying IR. Note that add // recurrences are AddRecExprs and thus don't hit this path. This @@ -7365,6 +7368,8 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) { Ops.push_back(II->getArgOperand(1)); return nullptr; case Intrinsic::start_loop_iterations: + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: Ops.push_back(II->getArgOperand(0)); return nullptr; default: @@ -7816,8 +7821,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return getAddExpr(ClampedX, Y, SCEV::FlagNUW); } case Intrinsic::start_loop_iterations: - // A start_loop_iterations is just equivalent to the first operand for - // SCEV purposes. + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + // A start_loop_iterations or llvm.annotation or llvm.prt.annotation is + // just eqivalent to the first operand for SCEV purposes. return getSCEV(II->getArgOperand(0)); default: break; @@ -9517,14 +9524,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { } return C; } - case scUDivExpr: { - const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V); - if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) - if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) - if (LHS->getType() == RHS->getType()) - return ConstantExpr::getUDiv(LHS, RHS); - return nullptr; - } + case scUDivExpr: case scSMaxExpr: case scUMaxExpr: case scSMinExpr: @@ -10632,7 +10632,7 @@ ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS, getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred)); assert(ResultSwapped && "should be able to analyze both!"); - assert(ResultSwapped.getValue() != Result.getValue() && + assert(ResultSwapped.value() != Result.value() && "monotonicity should flip as we flip the predicate"); } #endif @@ -11808,7 +11808,7 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); // Make sure L does not refer to a value from a potentially previous // iteration of a loop. - if (!properlyDominates(L, IncBB)) + if (!properlyDominates(L, LBB)) return false; if (!ProvedEasily(L, RHS)) return false; diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp index 203858c1cf06..682fc095b0e9 100644 --- a/llvm/lib/Analysis/TFUtils.cpp +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -18,7 +18,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/JSON.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -49,19 +48,17 @@ using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>; struct TFInitializer { TFInitializer() { - assert(!IsInitialized && "TFInitialized should be called only once"); int Argc = 1; const char *Name = ""; const char **NamePtr = &Name; TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr)); - IsInitialized = true; } - bool IsInitialized = false; }; -llvm::ManagedStatic<TFInitializer> TFLibInitializer; - -bool ensureInitTF() { return TFLibInitializer->IsInitialized; } +bool ensureInitTF() { + static TFInitializer TFLibInitializer; + return true; +} TFGraphPtr createTFGraph() { return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 66f61961d01b..6e34a8303c08 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -298,7 +298,7 @@ bool TargetTransformInfo::preferPredicateOverEpilogue( return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); } -bool TargetTransformInfo::emitGetActiveLaneMask() const { +PredicationStyle TargetTransformInfo::emitGetActiveLaneMask() const { return TTIImpl->emitGetActiveLaneMask(); } diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 9bcbe4a4cc1e..560f46d39d0d 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -303,24 +303,27 @@ public: /// given offset. Update the offset to be relative to the field type. TBAAStructTypeNode getField(uint64_t &Offset) const { bool NewFormat = isNewFormat(); + const ArrayRef<MDOperand> Operands(Node->op_begin(), Node->op_end()); + const unsigned NumOperands = Operands.size(); + if (NewFormat) { // New-format root and scalar type nodes have no fields. - if (Node->getNumOperands() < 6) + if (NumOperands < 6) return TBAAStructTypeNode(); } else { // Parent can be omitted for the root node. - if (Node->getNumOperands() < 2) + if (NumOperands < 2) return TBAAStructTypeNode(); // Fast path for a scalar type node and a struct type node with a single // field. - if (Node->getNumOperands() <= 3) { - uint64_t Cur = Node->getNumOperands() == 2 - ? 0 - : mdconst::extract<ConstantInt>(Node->getOperand(2)) - ->getZExtValue(); + if (NumOperands <= 3) { + uint64_t Cur = + NumOperands == 2 + ? 0 + : mdconst::extract<ConstantInt>(Operands[2])->getZExtValue(); Offset -= Cur; - MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); + MDNode *P = dyn_cast_or_null<MDNode>(Operands[1]); if (!P) return TBAAStructTypeNode(); return TBAAStructTypeNode(P); @@ -332,10 +335,11 @@ public: unsigned FirstFieldOpNo = NewFormat ? 3 : 1; unsigned NumOpsPerField = NewFormat ? 3 : 2; unsigned TheIdx = 0; - for (unsigned Idx = FirstFieldOpNo; Idx < Node->getNumOperands(); + + for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands; Idx += NumOpsPerField) { - uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1)) - ->getZExtValue(); + uint64_t Cur = + mdconst::extract<ConstantInt>(Operands[Idx + 1])->getZExtValue(); if (Cur > Offset) { assert(Idx >= FirstFieldOpNo + NumOpsPerField && "TBAAStructTypeNode::getField should have an offset match!"); @@ -345,11 +349,11 @@ public: } // Move along the last field. if (TheIdx == 0) - TheIdx = Node->getNumOperands() - NumOpsPerField; - uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1)) - ->getZExtValue(); + TheIdx = NumOperands - NumOpsPerField; + uint64_t Cur = + mdconst::extract<ConstantInt>(Operands[TheIdx + 1])->getZExtValue(); Offset -= Cur; - MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); + MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]); if (!P) return TBAAStructTypeNode(); return TBAAStructTypeNode(P); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 05d5e47bb8d7..add2d427e05b 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4679,27 +4679,22 @@ bool llvm::mustSuppressSpeculation(const LoadInst &LI) { F.hasFnAttribute(Attribute::SanitizeHWAddress); } - -bool llvm::isSafeToSpeculativelyExecute(const Value *V, +bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst, const Instruction *CtxI, const DominatorTree *DT, const TargetLibraryInfo *TLI) { - const Operator *Inst = dyn_cast<Operator>(V); - if (!Inst) - return false; - return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, DT, TLI); + return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, + DT, TLI); } -bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, - const Operator *Inst, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { +bool llvm::isSafeToSpeculativelyExecuteWithOpcode( + unsigned Opcode, const Instruction *Inst, const Instruction *CtxI, + const DominatorTree *DT, const TargetLibraryInfo *TLI) { #ifndef NDEBUG if (Inst->getOpcode() != Opcode) { // Check that the operands are actually compatible with the Opcode override. auto hasEqualReturnAndLeadingOperandTypes = - [](const Operator *Inst, unsigned NumLeadingOperands) { + [](const Instruction *Inst, unsigned NumLeadingOperands) { if (Inst->getNumOperands() < NumLeadingOperands) return false; const Type *ExpectedType = Inst->getType(); @@ -4715,11 +4710,6 @@ bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, } #endif - for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) - if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i))) - if (C->canTrap()) - return false; - switch (Opcode) { default: return true; diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index f863a1ffad3a..894680cda1fc 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1502,7 +1502,7 @@ void VFABI::getVectorVariantNames( LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n"); Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule())); assert(Info && "Invalid name for a VFABI variant."); - assert(CI.getModule()->getFunction(Info.getValue().VectorName) && + assert(CI.getModule()->getFunction(Info.value().VectorName) && "Vector function is missing."); #endif VariantMappings.push_back(std::string(S)); diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 30e6f8599208..c9a982693fa7 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -582,7 +582,6 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(no_sanitize_address); KEYWORD(no_sanitize_hwaddress); - KEYWORD(no_sanitize_memtag); KEYWORD(sanitize_address_dyninit); KEYWORD(ccc); @@ -661,7 +660,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une); KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax); - KEYWORD(umin); + KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin); KEYWORD(vscale); KEYWORD(x); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index a1cdeac2b47f..fd502eded0a0 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -456,10 +456,15 @@ bool LLParser::parseTargetDefinition() { return false; case lltok::kw_datalayout: Lex.Lex(); - if (parseToken(lltok::equal, "expected '=' after target datalayout") || - parseStringConstant(Str)) + if (parseToken(lltok::equal, "expected '=' after target datalayout")) + return true; + LocTy Loc = Lex.getLoc(); + if (parseStringConstant(Str)) return true; - M->setDataLayout(Str); + Expected<DataLayout> MaybeDL = DataLayout::parse(Str); + if (!MaybeDL) + return error(Loc, toString(MaybeDL.takeError())); + M->setDataLayout(MaybeDL.get()); return false; } } @@ -1107,7 +1112,7 @@ static bool isSanitizer(lltok::Kind Kind) { switch (Kind) { case lltok::kw_no_sanitize_address: case lltok::kw_no_sanitize_hwaddress: - case lltok::kw_no_sanitize_memtag: + case lltok::kw_sanitize_memtag: case lltok::kw_sanitize_address_dyninit: return true; default: @@ -1128,8 +1133,8 @@ bool LLParser::parseSanitizer(GlobalVariable *GV) { case lltok::kw_no_sanitize_hwaddress: Meta.NoHWAddress = true; break; - case lltok::kw_no_sanitize_memtag: - Meta.NoMemtag = true; + case lltok::kw_sanitize_memtag: + Meta.Memtag = true; break; case lltok::kw_sanitize_address_dyninit: Meta.IsDynInit = true; @@ -3474,32 +3479,26 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { } case lltok::kw_extractvalue: return error(ID.Loc, "extractvalue constexprs are no longer supported"); - case lltok::kw_insertvalue: { - Lex.Lex(); - Constant *Val0, *Val1; - SmallVector<unsigned, 4> Indices; - if (parseToken(lltok::lparen, "expected '(' in insertvalue constantexpr") || - parseGlobalTypeAndValue(Val0) || - parseToken(lltok::comma, - "expected comma in insertvalue constantexpr") || - parseGlobalTypeAndValue(Val1) || parseIndexList(Indices) || - parseToken(lltok::rparen, "expected ')' in insertvalue constantexpr")) - return true; - if (!Val0->getType()->isAggregateType()) - return error(ID.Loc, "insertvalue operand must be aggregate type"); - Type *IndexedType = - ExtractValueInst::getIndexedType(Val0->getType(), Indices); - if (!IndexedType) - return error(ID.Loc, "invalid indices for insertvalue"); - if (IndexedType != Val1->getType()) - return error(ID.Loc, "insertvalue operand and field disagree in type: '" + - getTypeString(Val1->getType()) + - "' instead of '" + getTypeString(IndexedType) + - "'"); - ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, Indices); - ID.Kind = ValID::t_Constant; - return false; - } + case lltok::kw_insertvalue: + return error(ID.Loc, "insertvalue constexprs are no longer supported"); + case lltok::kw_udiv: + return error(ID.Loc, "udiv constexprs are no longer supported"); + case lltok::kw_sdiv: + return error(ID.Loc, "sdiv constexprs are no longer supported"); + case lltok::kw_urem: + return error(ID.Loc, "urem constexprs are no longer supported"); + case lltok::kw_srem: + return error(ID.Loc, "srem constexprs are no longer supported"); + case lltok::kw_fadd: + return error(ID.Loc, "fadd constexprs are no longer supported"); + case lltok::kw_fsub: + return error(ID.Loc, "fsub constexprs are no longer supported"); + case lltok::kw_fmul: + return error(ID.Loc, "fmul constexprs are no longer supported"); + case lltok::kw_fdiv: + return error(ID.Loc, "fdiv constexprs are no longer supported"); + case lltok::kw_frem: + return error(ID.Loc, "frem constexprs are no longer supported"); case lltok::kw_icmp: case lltok::kw_fcmp: { unsigned PredVal, Opc = Lex.getUIntVal(); @@ -3559,17 +3558,8 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { } // Binary Operators. case lltok::kw_add: - case lltok::kw_fadd: case lltok::kw_sub: - case lltok::kw_fsub: case lltok::kw_mul: - case lltok::kw_fmul: - case lltok::kw_udiv: - case lltok::kw_sdiv: - case lltok::kw_fdiv: - case lltok::kw_urem: - case lltok::kw_srem: - case lltok::kw_frem: case lltok::kw_shl: case lltok::kw_lshr: case lltok::kw_ashr: { @@ -5398,8 +5388,10 @@ bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V, V = PFS->getVal(ID.StrVal, Ty, ID.Loc); return V == nullptr; case ValID::t_InlineAsm: { - if (!ID.FTy || !InlineAsm::Verify(ID.FTy, ID.StrVal2)) + if (!ID.FTy) return error(ID.Loc, "invalid type for inline asm constraint string"); + if (Error Err = InlineAsm::verify(ID.FTy, ID.StrVal2)) + return error(ID.Loc, toString(std::move(Err))); V = InlineAsm::get( ID.FTy, ID.StrVal, ID.StrVal2, ID.UIntVal & 1, (ID.UIntVal >> 1) & 1, InlineAsm::AsmDialect((ID.UIntVal >> 2) & 1), (ID.UIntVal >> 3) & 1); @@ -7483,6 +7475,14 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { Operation = AtomicRMWInst::FSub; IsFP = true; break; + case lltok::kw_fmax: + Operation = AtomicRMWInst::FMax; + IsFP = true; + break; + case lltok::kw_fmin: + Operation = AtomicRMWInst::FMin; + IsFP = true; + break; } Lex.Lex(); // Eat the operation. diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 93b07fc0db30..8d5a2555f9af 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -69,7 +69,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -1243,6 +1242,8 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) { case bitc::RMW_UMIN: return AtomicRMWInst::UMin; case bitc::RMW_FADD: return AtomicRMWInst::FAdd; case bitc::RMW_FSUB: return AtomicRMWInst::FSub; + case bitc::RMW_FMAX: return AtomicRMWInst::FMax; + case bitc::RMW_FMIN: return AtomicRMWInst::FMin; } } @@ -1384,6 +1385,9 @@ static bool isConstExprSupported(uint8_t Opcode) { if (Opcode >= BitcodeConstant::FirstSpecialOpcode) return true; + if (Instruction::isBinaryOp(Opcode)) + return ConstantExpr::isSupportedBinOp(Opcode); + return !ExpandConstantExprs; } @@ -1851,6 +1855,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::DisableSanitizerInstrumentation; case bitc::ATTR_KIND_ELEMENTTYPE: return Attribute::ElementType; + case bitc::ATTR_KIND_FNRETTHUNK_EXTERN: + return Attribute::FnRetThunkExtern; case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY: return Attribute::InaccessibleMemOnly; case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY: @@ -3672,7 +3678,7 @@ GlobalValue::SanitizerMetadata deserializeSanitizerMetadata(unsigned V) { if (V & (1 << 1)) Meta.NoHWAddress = true; if (V & (1 << 2)) - Meta.NoMemtag = true; + Meta.Memtag = true; if (V & (1 << 3)) Meta.IsDynInit = true; return Meta; @@ -7441,10 +7447,9 @@ class BitcodeErrorCategoryType : public std::error_category { } // end anonymous namespace -static ManagedStatic<BitcodeErrorCategoryType> ErrorCategory; - const std::error_category &llvm::BitcodeErrorCategory() { - return *ErrorCategory; + static BitcodeErrorCategoryType ErrorCategory; + return ErrorCategory; } static Expected<StringRef> readBlobInRecord(BitstreamCursor &Stream, diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 941ed808bab1..590562ce2796 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -577,6 +577,8 @@ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) { case AtomicRMWInst::UMin: return bitc::RMW_UMIN; case AtomicRMWInst::FAdd: return bitc::RMW_FADD; case AtomicRMWInst::FSub: return bitc::RMW_FSUB; + case AtomicRMWInst::FMax: return bitc::RMW_FMAX; + case AtomicRMWInst::FMin: return bitc::RMW_FMIN; } } @@ -632,6 +634,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_COLD; case Attribute::DisableSanitizerInstrumentation: return bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION; + case Attribute::FnRetThunkExtern: + return bitc::ATTR_KIND_FNRETTHUNK_EXTERN; case Attribute::Hot: return bitc::ATTR_KIND_HOT; case Attribute::ElementType: @@ -1230,7 +1234,7 @@ static_assert(sizeof(GlobalValue::SanitizerMetadata) <= sizeof(unsigned), static unsigned serializeSanitizerMetadata(const GlobalValue::SanitizerMetadata &Meta) { return Meta.NoAddress | (Meta.NoHWAddress << 1) | - (Meta.NoMemtag << 2) | (Meta.IsDynInit << 3); + (Meta.Memtag << 2) | (Meta.IsDynInit << 3); } /// Emit top-level description of module, including target triple, inline asm, @@ -2674,9 +2678,6 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal, Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(CE->getPredicate()); break; - case Instruction::InsertValue: - report_fatal_error("insertvalue constexprs not supported"); - break; } } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) { Code = bitc::CST_CODE_BLOCKADDRESS; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 4a31bf85446b..94612a51d2e1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1816,6 +1816,11 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { if (TM.getTargetTriple().isOSBinFormatXCOFF()) { assert(MAI->hasVisibilityOnlyWithLinkage() && "Visibility should be handled with emitLinkage() on AIX."); + + // Linkage for alias of global variable has been emitted. + if (isa<GlobalVariable>(GA.getAliaseeObject())) + return; + emitLinkage(&GA, Name); // If it's a function, also emit linkage for aliases of function entry // point. @@ -2860,7 +2865,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, AsmPrinter &AP, const Constant *BaseCV = nullptr, - uint64_t Offset = 0); + uint64_t Offset = 0, + AsmPrinter::AliasMapTy *AliasList = nullptr); static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP); static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP); @@ -2914,9 +2920,21 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) { return -1; } -static void emitGlobalConstantDataSequential(const DataLayout &DL, - const ConstantDataSequential *CDS, - AsmPrinter &AP) { +static void emitGlobalAliasInline(AsmPrinter &AP, uint64_t Offset, + AsmPrinter::AliasMapTy *AliasList) { + if (AliasList) { + auto AliasIt = AliasList->find(Offset); + if (AliasIt != AliasList->end()) { + for (const GlobalAlias *GA : AliasIt->second) + AP.OutStreamer->emitLabel(AP.getSymbol(GA)); + AliasList->erase(Offset); + } + } +} + +static void emitGlobalConstantDataSequential( + const DataLayout &DL, const ConstantDataSequential *CDS, AsmPrinter &AP, + AsmPrinter::AliasMapTy *AliasList) { // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, DL); if (Value != -1) { @@ -2933,17 +2951,20 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, // Otherwise, emit the values in successive locations. unsigned ElementByteSize = CDS->getElementByteSize(); if (isa<IntegerType>(CDS->getElementType())) { - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + emitGlobalAliasInline(AP, ElementByteSize * I, AliasList); if (AP.isVerbose()) AP.OutStreamer->getCommentOS() - << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i)); - AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(i), + << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(I)); + AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(I), ElementByteSize); } } else { Type *ET = CDS->getElementType(); - for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + emitGlobalAliasInline(AP, ElementByteSize * I, AliasList); emitGlobalConstantFP(CDS->getElementAsAPFloat(I), ET, AP); + } } unsigned Size = DL.getTypeAllocSize(CDS->getType()); @@ -2956,7 +2977,8 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, static void emitGlobalConstantArray(const DataLayout &DL, const ConstantArray *CA, AsmPrinter &AP, - const Constant *BaseCV, uint64_t Offset) { + const Constant *BaseCV, uint64_t Offset, + AsmPrinter::AliasMapTy *AliasList) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, DL); @@ -2964,44 +2986,75 @@ static void emitGlobalConstantArray(const DataLayout &DL, if (Value != -1) { uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); AP.OutStreamer->emitFill(Bytes, Value); - } - else { - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { - emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset); - Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } else { + for (unsigned I = 0, E = CA->getNumOperands(); I != E; ++I) { + emitGlobalConstantImpl(DL, CA->getOperand(I), AP, BaseCV, Offset, + AliasList); + Offset += DL.getTypeAllocSize(CA->getOperand(I)->getType()); } } } +static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP); + static void emitGlobalConstantVector(const DataLayout &DL, - const ConstantVector *CV, AsmPrinter &AP) { - for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - emitGlobalConstantImpl(DL, CV->getOperand(i), AP); + const ConstantVector *CV, AsmPrinter &AP, + AsmPrinter::AliasMapTy *AliasList) { + Type *ElementType = CV->getType()->getElementType(); + uint64_t ElementSizeInBits = DL.getTypeSizeInBits(ElementType); + uint64_t ElementAllocSizeInBits = DL.getTypeAllocSizeInBits(ElementType); + uint64_t EmittedSize; + if (ElementSizeInBits != ElementAllocSizeInBits) { + // If the allocation size of an element is different from the size in bits, + // printing each element separately will insert incorrect padding. + // + // The general algorithm here is complicated; instead of writing it out + // here, just use the existing code in ConstantFolding. + Type *IntT = + IntegerType::get(CV->getContext(), DL.getTypeSizeInBits(CV->getType())); + ConstantInt *CI = dyn_cast_or_null<ConstantInt>(ConstantFoldConstant( + ConstantExpr::getBitCast(const_cast<ConstantVector *>(CV), IntT), DL)); + if (!CI) { + report_fatal_error( + "Cannot lower vector global with unusual element type"); + } + emitGlobalAliasInline(AP, 0, AliasList); + emitGlobalConstantLargeInt(CI, AP); + EmittedSize = DL.getTypeStoreSize(CV->getType()); + } else { + for (unsigned I = 0, E = CV->getType()->getNumElements(); I != E; ++I) { + emitGlobalAliasInline(AP, DL.getTypeAllocSize(CV->getType()) * I, AliasList); + emitGlobalConstantImpl(DL, CV->getOperand(I), AP); + } + EmittedSize = + DL.getTypeAllocSize(ElementType) * CV->getType()->getNumElements(); + } unsigned Size = DL.getTypeAllocSize(CV->getType()); - unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * - CV->getType()->getNumElements(); if (unsigned Padding = Size - EmittedSize) AP.OutStreamer->emitZeros(Padding); } static void emitGlobalConstantStruct(const DataLayout &DL, const ConstantStruct *CS, AsmPrinter &AP, - const Constant *BaseCV, uint64_t Offset) { + const Constant *BaseCV, uint64_t Offset, + AsmPrinter::AliasMapTy *AliasList) { // Print the fields in successive locations. Pad to align if needed! unsigned Size = DL.getTypeAllocSize(CS->getType()); const StructLayout *Layout = DL.getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; - for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { - const Constant *Field = CS->getOperand(i); + for (unsigned I = 0, E = CS->getNumOperands(); I != E; ++I) { + const Constant *Field = CS->getOperand(I); // Print the actual field value. - emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar); + emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar, + AliasList); // Check if padding is needed and insert one or more 0s. uint64_t FieldSize = DL.getTypeAllocSize(Field->getType()); - uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - - Layout->getElementOffset(i)) - FieldSize; + uint64_t PadSize = ((I == E - 1 ? Size : Layout->getElementOffset(I + 1)) - + Layout->getElementOffset(I)) - + FieldSize; SizeSoFar += FieldSize + PadSize; // Insert padding - this may include padding to increase the size of the @@ -3211,7 +3264,9 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, AsmPrinter &AP, const Constant *BaseCV, - uint64_t Offset) { + uint64_t Offset, + AsmPrinter::AliasMapTy *AliasList) { + emitGlobalAliasInline(AP, Offset, AliasList); uint64_t Size = DL.getTypeAllocSize(CV->getType()); // Globals with sub-elements such as combinations of arrays and structs @@ -3251,13 +3306,13 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, } if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) - return emitGlobalConstantDataSequential(DL, CDS, AP); + return emitGlobalConstantDataSequential(DL, CDS, AP, AliasList); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset); + return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset, AliasList); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset); + return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset, AliasList); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of @@ -3276,7 +3331,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, } if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return emitGlobalConstantVector(DL, V, AP); + return emitGlobalConstantVector(DL, V, AP, AliasList); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. @@ -3292,15 +3347,21 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV) { +void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV, + AliasMapTy *AliasList) { uint64_t Size = DL.getTypeAllocSize(CV->getType()); if (Size) - emitGlobalConstantImpl(DL, CV, *this); + emitGlobalConstantImpl(DL, CV, *this, nullptr, 0, AliasList); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. OutStreamer->emitIntValue(0, 1); } + if (!AliasList) + return; + for (const auto &AliasPair : *AliasList) + report_fatal_error("Aliases with offset " + Twine(AliasPair.first) + + " were not emitted."); } void AsmPrinter::emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def index 28a02390fccb..c872d0dd2dfa 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def @@ -51,5 +51,5 @@ HANDLE_DIE_HASH_ATTR(DW_AT_virtuality) HANDLE_DIE_HASH_ATTR(DW_AT_visibility) HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location) HANDLE_DIE_HASH_ATTR(DW_AT_type) - +HANDLE_DIE_HASH_ATTR(DW_AT_linkage_name) #undef HANDLE_DIE_HASH_ATTR diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 5ce6fbb5f647..ad9dc517539a 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1646,6 +1646,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: // No atomic libcalls are available for max/min/umax/umin. diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 7883a48d121c..59932a542bbc 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -120,8 +120,7 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) { // Fallback to whether the intrinsic is speculatable. Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode(); unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call); - return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, - cast<Operator>(&VPI)); + return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, &VPI); } //// } Helpers diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 081c8b125f17..b06043fb4c31 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -500,6 +500,12 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, LLT DstTy = MRI.getType(DstRegs[0]); LLT LCMTy = getCoverTy(SrcTy, PartTy); + if (PartTy.isVector() && LCMTy == PartTy) { + assert(DstRegs.size() == 1); + B.buildPadVectorWithUndefElements(DstRegs[0], SrcReg); + return; + } + const unsigned DstSize = DstTy.getSizeInBits(); const unsigned SrcSize = SrcTy.getSizeInBits(); unsigned CoveringSize = LCMTy.getSizeInBits(); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 2c94f87804ac..ad0c0c8315dc 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -697,14 +697,16 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, return false; Register SrcReg = MI.getOperand(1).getReg(); - GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI); - if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) || - !LoadMI->isSimple()) + // Don't use getOpcodeDef() here since intermediate instructions may have + // multiple users. + GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg)); + if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg())) return false; Register LoadReg = LoadMI->getDstReg(); - LLT LoadTy = MRI.getType(LoadReg); + LLT RegTy = MRI.getType(LoadReg); Register PtrReg = LoadMI->getPointerReg(); + unsigned RegSize = RegTy.getSizeInBits(); uint64_t LoadSizeBits = LoadMI->getMemSizeInBits(); unsigned MaskSizeBits = MaskVal.countTrailingOnes(); @@ -715,7 +717,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, // If the mask covers the whole destination register, there's nothing to // extend - if (MaskSizeBits >= LoadTy.getSizeInBits()) + if (MaskSizeBits >= RegSize) return false; // Most targets cannot deal with loads of size < 8 and need to re-legalize to @@ -725,17 +727,26 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, const MachineMemOperand &MMO = LoadMI->getMMO(); LegalityQuery::MemDesc MemDesc(MMO); - MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); + + // Don't modify the memory access size if this is atomic/volatile, but we can + // still adjust the opcode to indicate the high bit behavior. + if (LoadMI->isSimple()) + MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); + else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize) + return false; + + // TODO: Could check if it's legal with the reduced or original memory size. if (!isLegalOrBeforeLegalizer( - {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}})) + {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}})) return false; MatchInfo = [=](MachineIRBuilder &B) { B.setInstrAndDebugLoc(*LoadMI); auto &MF = B.getMF(); auto PtrInfo = MMO.getPointerInfo(); - auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy); B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO); + LoadMI->eraseFromParent(); }; return true; } @@ -805,21 +816,24 @@ bool CombinerHelper::matchSextInRegOfLoad( MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register DstReg = MI.getOperand(0).getReg(); + LLT RegTy = MRI.getType(DstReg); + // Only supports scalars for now. - if (MRI.getType(MI.getOperand(0).getReg()).isVector()) + if (RegTy.isVector()) return false; Register SrcReg = MI.getOperand(1).getReg(); auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI); - if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) || - !LoadDef->isSimple()) + if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg)) return false; + uint64_t MemBits = LoadDef->getMemSizeInBits(); + // If the sign extend extends from a narrower width than the load's width, // then we can narrow the load width when we combine to a G_SEXTLOAD. // Avoid widening the load at all. - unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), - LoadDef->getMemSizeInBits()); + unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits); // Don't generate G_SEXTLOADs with a < 1 byte width. if (NewSizeBits < 8) @@ -831,7 +845,15 @@ bool CombinerHelper::matchSextInRegOfLoad( const MachineMemOperand &MMO = LoadDef->getMMO(); LegalityQuery::MemDesc MMDesc(MMO); - MMDesc.MemoryTy = LLT::scalar(NewSizeBits); + + // Don't modify the memory access size if this is atomic/volatile, but we can + // still adjust the opcode to indicate the high bit behavior. + if (LoadDef->isSimple()) + MMDesc.MemoryTy = LLT::scalar(NewSizeBits); + else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits()) + return false; + + // TODO: Could check if it's legal with the reduced or original memory size. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD, {MRI.getType(LoadDef->getDstReg()), MRI.getType(LoadDef->getPointerReg())}, diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index a2af66d28f4a..947facc87b71 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2076,9 +2076,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getStackGuard(getOrCreateVReg(CI), MIRBuilder); return true; case Intrinsic::stackprotector: { + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); - Register GuardVal = MRI->createGenericVirtualRegister(PtrTy); - getStackGuard(GuardVal, MIRBuilder); + Register GuardVal; + if (TLI.useLoadStackGuardNode()) { + GuardVal = MRI->createGenericVirtualRegister(PtrTy); + getStackGuard(GuardVal, MIRBuilder); + } else + GuardVal = getOrCreateVReg(*CI.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1)); int FI = getOrCreateFrameIndex(*Slot); @@ -2883,6 +2888,12 @@ bool IRTranslator::translateAtomicRMW(const User &U, case AtomicRMWInst::FSub: Opcode = TargetOpcode::G_ATOMICRMW_FSUB; break; + case AtomicRMWInst::FMax: + Opcode = TargetOpcode::G_ATOMICRMW_FMAX; + break; + case AtomicRMWInst::FMin: + Opcode = TargetOpcode::G_ATOMICRMW_FMIN; + break; } MIRBuilder.buildAtomicRMW( diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 19ebf46191a9..0d9580e25606 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -473,6 +473,23 @@ MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res, return buildInstr(ExtOp, Res, Op); } +MachineInstrBuilder MachineIRBuilder::buildBoolExtInReg(const DstOp &Res, + const SrcOp &Op, + bool IsVector, + bool IsFP) { + const auto *TLI = getMF().getSubtarget().getTargetLowering(); + switch (TLI->getBooleanContents(IsVector, IsFP)) { + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + return buildSExtInReg(Res, Op, 1); + case TargetLoweringBase::ZeroOrOneBooleanContent: + return buildZExtInReg(Res, Op, 1); + case TargetLoweringBase::UndefinedBooleanContent: + return buildCopy(Res, Op); + } + + llvm_unreachable("unexpected BooleanContent"); +} + MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op) { @@ -938,6 +955,20 @@ MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, } MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFMax(const DstOp &OldValRes, const SrcOp &Addr, + const SrcOp &Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMAX, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFMin(const DstOp &OldValRes, const SrcOp &Addr, + const SrcOp &Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMIN, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) { return buildInstr(TargetOpcode::G_FENCE) .addImm(Ordering) diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index b3f38a3b53f3..55f3ad796291 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -310,10 +310,11 @@ bool InterleavedAccess::lowerInterleavedLoad( Extracts.push_back(Extract); continue; } - auto *BI = dyn_cast<BinaryOperator>(User); - if (BI && BI->hasOneUse()) { - if (auto *SVI = dyn_cast<ShuffleVectorInst>(*BI->user_begin())) { - BinOpShuffles.insert(SVI); + if (auto *BI = dyn_cast<BinaryOperator>(User)) { + if (all_of(BI->users(), + [](auto *U) { return isa<ShuffleVectorInst>(U); })) { + for (auto *SVI : BI->users()) + BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI)); continue; } } diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 30ca8bd871e8..43c12c67939e 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -536,6 +536,17 @@ public: // What was the old variable value? ValueIDNum OldValue = VarLocs[MLoc.asU64()]; + clobberMloc(MLoc, OldValue, Pos, MakeUndef); + } + /// Overload that takes an explicit value \p OldValue for when the value in + /// \p MLoc has changed and the TransferTracker's locations have not been + /// updated yet. + void clobberMloc(LocIdx MLoc, ValueIDNum OldValue, + MachineBasicBlock::iterator Pos, bool MakeUndef = true) { + auto ActiveMLocIt = ActiveMLocs.find(MLoc); + if (ActiveMLocIt == ActiveMLocs.end()) + return; + VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue; // Examine the remaining variable locations: if we can find the same value @@ -1730,9 +1741,35 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { if (EmulateOldLDV && !SrcRegOp->isKill()) return false; + // Before we update MTracker, remember which values were present in each of + // the locations about to be overwritten, so that we can recover any + // potentially clobbered variables. + DenseMap<LocIdx, ValueIDNum> ClobberedLocs; + if (TTracker) { + for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) { + LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI); + auto MLocIt = TTracker->ActiveMLocs.find(ClobberedLoc); + // If ActiveMLocs isn't tracking this location or there are no variables + // using it, don't bother remembering. + if (MLocIt == TTracker->ActiveMLocs.end() || MLocIt->second.empty()) + continue; + ValueIDNum Value = MTracker->readReg(*RAI); + ClobberedLocs[ClobberedLoc] = Value; + } + } + // Copy MTracker info, including subregs if available. InstrRefBasedLDV::performCopy(SrcReg, DestReg); + // The copy might have clobbered variables based on the destination register. + // Tell TTracker about it, passing the old ValueIDNum to search for + // alternative locations (or else terminating those variables). + if (TTracker) { + for (auto LocVal : ClobberedLocs) { + TTracker->clobberMloc(LocVal.first, LocVal.second, MI.getIterator(), false); + } + } + // Only produce a transfer of DBG_VALUE within a block where old LDV // would have. We might make use of the additional value tracking in some // other way, later. @@ -1744,15 +1781,6 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { if (EmulateOldLDV && SrcReg != DestReg) MTracker->defReg(SrcReg, CurBB, CurInst); - // Finally, the copy might have clobbered variables based on the destination - // register. Tell TTracker about it, in case a backup location exists. - if (TTracker) { - for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) { - LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI); - TTracker->clobberMloc(ClobberedLoc, MI.getIterator(), false); - } - } - return true; } diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 7d825a8bf853..1242ce20b732 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -1049,12 +1049,17 @@ public: // we may end up with a main range not covering all subranges. // This is extremely rare case, so let's check and reconstruct the // main range. - for (LiveInterval::SubRange &S : LI.subranges()) { - if (LI.covers(S)) - continue; - LI.clear(); - LIS.constructMainRangeFromSubranges(LI); - break; + if (LI.hasSubRanges()) { + unsigned SubReg = MO.getSubReg(); + LaneBitmask LaneMask = SubReg ? TRI.getSubRegIndexLaneMask(SubReg) + : MRI.getMaxLaneMaskForVReg(Reg); + for (LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & LaneMask).none() || LI.covers(S)) + continue; + LI.clear(); + LIS.constructMainRangeFromSubranges(LI); + break; + } } continue; diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 40ae7053ea09..0c94e1f7e474 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -742,7 +742,7 @@ bool MIParser::parseBasicBlockDefinition( MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget); MBB->setIsEHFuncletEntry(IsEHFuncletEntry); if (SectionID) { - MBB->setSectionID(SectionID.getValue()); + MBB->setSectionID(SectionID.value()); MF.setBBSectionsType(BasicBlockSection::List); } return false; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index c186d0ba9969..02c44fa85cd9 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -451,7 +451,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (IrrLoopHeaderWeight && IsStandalone) { if (Indexes) OS << '\t'; OS.indent(2) << "; Irreducible loop header weight: " - << IrrLoopHeaderWeight.getValue() << '\n'; + << IrrLoopHeaderWeight.value() << '\n'; } } diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 81c97ba6a086..867a7ed584b2 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -106,8 +106,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { // We don't want to proceed further for cold functions // or functions of unknown hotness. Lukewarm functions have no prefix. Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); - if (SectionPrefix && (SectionPrefix.getValue().equals("unlikely") || - SectionPrefix.getValue().equals("unknown"))) { + if (SectionPrefix && (SectionPrefix.value().equals("unlikely") || + SectionPrefix.value().equals("unknown"))) { return false; } diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 4e00a211713e..5f80445a5a34 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -93,8 +93,11 @@ cl::opt<bool> VerifyScheduling( cl::opt<bool> ViewMISchedDAGs( "view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed")); +cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, + cl::desc("Print schedule DAGs")); #else const bool ViewMISchedDAGs = false; +const bool PrintDAGs = false; #endif // NDEBUG } // end namespace llvm @@ -112,10 +115,6 @@ static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function")); static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#")); -static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, - cl::desc("Print schedule DAGs")); -#else -static const bool PrintDAGs = false; #endif // NDEBUG /// Avoid quadratic complexity in unusually large basic blocks by limiting the diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index db04f2bcc095..7a008bae726e 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -293,6 +293,7 @@ namespace { } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addUsedIfAvailable<LiveStacks>(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 3245d9649be1..581168b31384 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -1448,7 +1448,7 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, const TargetRegisterClass *RC) { // If the init register is not undef, try and find an existing phi. if (InitReg) { - auto I = Phis.find({LoopReg, InitReg.getValue()}); + auto I = Phis.find({LoopReg, InitReg.value()}); if (I != Phis.end()) return I->second; } else { @@ -1469,10 +1469,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, return R; // Found a phi taking undef as input, so rewrite it to take InitReg. MachineInstr *MI = MRI.getVRegDef(R); - MI->getOperand(1).setReg(InitReg.getValue()); - Phis.insert({{LoopReg, InitReg.getValue()}, R}); + MI->getOperand(1).setReg(InitReg.value()); + Phis.insert({{LoopReg, InitReg.value()}, R}); const TargetRegisterClass *ConstrainRegClass = - MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue())); + MRI.constrainRegClass(R, MRI.getRegClass(InitReg.value())); assert(ConstrainRegClass && "Expected a valid constrained register class!"); (void)ConstrainRegClass; UndefPhis.erase(I); diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 1a0f296d5fdc..89a43c4f57f6 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -554,7 +554,7 @@ static void updateLiveness(MachineFunction &MF) { } } -/// Insert restore code for the callee-saved registers used in the function. +/// Insert spill code for the callee-saved registers used in the function. static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef<CalleeSavedInfo> CSI) { MachineFunction &MF = *SaveBlock.getParent(); diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h index d57b0ca6d53d..d6a3997e4b70 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -148,9 +148,6 @@ protected: /// Run or not the local reassignment heuristic. This information is /// obtained from the TargetSubtargetInfo. const bool EnableLocalReassign; - -private: - unsigned NextCascade = 1; }; /// ImmutableAnalysis abstraction for fetching the Eviction Advisor. We model it diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index c199b6a6cca8..d627519a34aa 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -870,8 +870,8 @@ bool SelectOptimize::computeLoopCosts( ORE->emit(ORmissL); return false; } - IPredCost += Scaled64::get(ILatency.getValue()); - INonPredCost += Scaled64::get(ILatency.getValue()); + IPredCost += Scaled64::get(ILatency.value()); + INonPredCost += Scaled64::get(ILatency.value()); // For a select that can be converted to branch, // compute its cost as a branch (non-predicated cost). diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aa688d9dda3c..2654c00929d8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2392,12 +2392,14 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) SDLoc DL(N); - auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; - SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); - if (SDValue NewC = - DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT, - {ConstantOp, DAG.getConstant(1, DL, VT)})) + if (SDValue NewC = DAG.FoldConstantArithmetic( + IsAdd ? ISD::ADD : ISD::SUB, DL, VT, + {ConstantOp, DAG.getConstant(1, DL, VT)})) { + SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT, + Not.getOperand(0), ShAmt); return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC); + } + return SDValue(); } @@ -3760,6 +3762,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // If there's no chance of borrowing from adjacent bits, then sub is xor: + // sub C0, X --> xor X, C0 + if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) { + if (!C0->isOpaque()) { + const APInt &C0Val = C0->getAPIntValue(); + const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero; + if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes)) + return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + } + } + return SDValue(); } @@ -4550,13 +4563,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) { SDLoc DL(N); // fold (rem c1, c2) -> c1%c2 - ConstantSDNode *N1C = isConstOrConstSplat(N1); if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; // fold (urem X, -1) -> select(FX == -1, 0, FX) // Freeze the numerator to avoid a miscompile with an undefined value. - if (!isSigned && N1C && N1C->isAllOnes()) { + if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) { SDValue F0 = DAG.getFreeze(N0); SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ); return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0); @@ -4581,9 +4593,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) { AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0, Add); } - if (N1.getOpcode() == ISD::SHL && + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1)) + // TODO: We should sink the following into isKnownToBePowerOfTwo + // using a OrZero parameter analogous to our handling in ValueTracking. + if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) && DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { - // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) SDValue NegOne = DAG.getAllOnesConstant(DL, VT); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); AddToWorklist(Add.getNode()); @@ -9288,31 +9303,44 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. // sra (add (shl X, N1C), AddC), N1C --> // sext (add (trunc X to (width - N1C)), AddC') - if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && - N0.getOperand(0).getOpcode() == ISD::SHL && - N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { - if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { - SDValue Shl = N0.getOperand(0); - // Determine what the truncate's type would be and ask the target if that - // is a free operation. - LLVMContext &Ctx = *DAG.getContext(); - unsigned ShiftAmt = N1C->getZExtValue(); - EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); - if (VT.isVector()) - TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount()); - - // TODO: The simple type check probably belongs in the default hook - // implementation and/or target-specific overrides (because - // non-simple types likely require masking when legalized), but that - // restriction may conflict with other transforms. - if (TruncVT.isSimple() && isTypeLegal(TruncVT) && - TLI.isTruncateFree(VT, TruncVT)) { - SDLoc DL(N); - SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); - SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). - trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); - SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); - return DAG.getSExtOrTrunc(Add, DL, VT); + // sra (sub AddC, (shl X, N1C)), N1C --> + // sext (sub AddC1',(trunc X to (width - N1C))) + if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C && + N0.hasOneUse()) { + bool IsAdd = N0.getOpcode() == ISD::ADD; + SDValue Shl = N0.getOperand(IsAdd ? 0 : 1); + if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 && + Shl.hasOneUse()) { + // TODO: AddC does not need to be a splat. + if (ConstantSDNode *AddC = + isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) { + // Determine what the truncate's type would be and ask the target if + // that is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount()); + + // TODO: The simple type check probably belongs in the default hook + // implementation and/or target-specific overrides (because + // non-simple types likely require masking when legalized), but + // that restriction may conflict with other transforms. + if (TruncVT.isSimple() && isTypeLegal(TruncVT) && + TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = + DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc( + TruncVT.getScalarSizeInBits()), + DL, TruncVT); + SDValue Add; + if (IsAdd) + Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + else + Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc); + return DAG.getSExtOrTrunc(Add, DL, VT); + } } } } @@ -11025,6 +11053,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG)) return V; + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -13243,18 +13274,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } - // See if we can simplify the input to this truncate through knowledge that - // only the low bits are being used. - // For example "trunc (or (shl x, 8), y)" // -> trunc y - // Currently we only perform this optimization on scalars because vectors - // may have different active low bits. - if (!VT.isVector()) { - APInt Mask = - APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); - if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); - } - // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { @@ -13341,6 +13360,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + // See if we can simplify the input to this truncate through knowledge that + // only the low bits are being used. + // For example "trunc (or (shl x, 8), y)" // -> trunc y + // Currently we only perform this optimization on scalars because vectors + // may have different active low bits. + if (!VT.isVector()) { + APInt Mask = + APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); + if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); + } + // fold (truncate (extract_subvector(ext x))) -> // (extract_subvector x) // TODO: This can be generalized to cover cases where the truncate and extract @@ -24514,8 +24545,9 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { auto &Size0 = MUC0.NumBytes; auto &Size1 = MUC1.NumBytes; if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - Size0 && Size1 && *Size0 == *Size1 && OrigAlignment0 > *Size0 && - SrcValOffset0 % *Size0 == 0 && SrcValOffset1 % *Size1 == 0) { + Size0.has_value() && Size1.has_value() && *Size0 == *Size1 && + OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 && + SrcValOffset1 % *Size1 == 0) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value(); int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index f464208cd9dc..6c136bdfc652 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2915,6 +2915,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break; case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break; + case ISD::STACKMAP: + Res = SoftPromoteHalfOp_STACKMAP(N, OpNo); + break; } if (!Res.getNode()) @@ -3042,3 +3045,17 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) { return DAG.getStore(ST->getChain(), dl, Promoted, ST->getBasePtr(), ST->getMemOperand()); } + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) { + assert(OpNo > 1); // Because the first two arguments are guaranteed legal. + SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); + SDValue Op = N->getOperand(OpNo); + NewOps[OpNo] = GetSoftPromotedHalf(Op); + SDValue NewNode = + DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), NewOps); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum)); + + return SDValue(); // Signal that we replaced the node ourselves. +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 69fd83bcd7b3..343722a97c3c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -19,6 +19,7 @@ #include "LegalizeTypes.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -1723,6 +1724,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { break; case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break; + case ISD::STACKMAP: + Res = PromoteIntOp_STACKMAP(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -2255,16 +2259,40 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { SDLoc dl(N); SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0)); - EVT EltVT = Op.getValueType().getVectorElementType(); - EVT VT = N->getValueType(0); + EVT OrigEltVT = N->getOperand(0).getValueType().getVectorElementType(); + EVT InVT = Op.getValueType(); + EVT EltVT = InVT.getVectorElementType(); + EVT ResVT = N->getValueType(0); + unsigned Opcode = N->getOpcode(); - if (VT.bitsGE(EltVT)) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op); + // An i1 vecreduce_xor is equivalent to vecreduce_add, use that instead if + // vecreduce_xor is not legal + if (Opcode == ISD::VECREDUCE_XOR && OrigEltVT == MVT::i1 && + !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_XOR, InVT) && + TLI.isOperationLegalOrCustom(ISD::VECREDUCE_ADD, InVT)) + Opcode = ISD::VECREDUCE_ADD; + + // An i1 vecreduce_or is equivalent to vecreduce_umax, use that instead if + // vecreduce_or is not legal + else if (Opcode == ISD::VECREDUCE_OR && OrigEltVT == MVT::i1 && + !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) && + TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT)) + Opcode = ISD::VECREDUCE_UMAX; + + // An i1 vecreduce_and is equivalent to vecreduce_umin, use that instead if + // vecreduce_and is not legal + else if (Opcode == ISD::VECREDUCE_AND && OrigEltVT == MVT::i1 && + !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) && + TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT)) + Opcode = ISD::VECREDUCE_UMIN; + + if (ResVT.bitsGE(EltVT)) + return DAG.getNode(Opcode, SDLoc(N), ResVT, Op); // Result size must be >= element size. If this is not the case after // promotion, also promote the result type and then truncate. - SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce); + SDValue Reduce = DAG.getNode(Opcode, dl, EltVT, Op); + return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Reduce); } SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) { @@ -2304,6 +2332,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) { + assert(OpNo > 1); // Because the first two arguments are guaranteed legal. + SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); + SDValue Operand = N->getOperand(OpNo); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType()); + NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// @@ -4653,6 +4690,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break; + case ISD::STACKMAP: + Res = ExpandIntOp_STACKMAP(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -5481,3 +5521,44 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(N->getValueType(0), dl, NewOps); } + +SDValue DAGTypeLegalizer::ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo) { + assert(OpNo > 1); + + SDValue Op = N->getOperand(OpNo); + SDLoc DL = SDLoc(N); + SmallVector<SDValue> NewOps; + + // Copy operands before the one being expanded. + for (unsigned I = 0; I < OpNo; I++) + NewOps.push_back(N->getOperand(I)); + + if (Op->getOpcode() == ISD::Constant) { + ConstantSDNode *CN = cast<ConstantSDNode>(Op); + EVT Ty = Op.getValueType(); + if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { + NewOps.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); + } else { + // FIXME: https://github.com/llvm/llvm-project/issues/55609 + return SDValue(); + } + } else { + // FIXME: Non-constant operands are not yet handled: + // - https://github.com/llvm/llvm-project/issues/26431 + // - https://github.com/llvm/llvm-project/issues/55957 + return SDValue(); + } + + // Copy remaining operands. + for (unsigned I = OpNo + 1; I < N->getNumOperands(); I++) + NewOps.push_back(N->getOperand(I)); + + SDValue NewNode = DAG.getNode(N->getOpcode(), DL, N->getVTList(), NewOps); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum)); + + return SDValue(); // Signal that we have replaced the node already. +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index de320290bda9..2807b7f5ae68 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -402,6 +402,7 @@ private: SDValue PromoteIntOp_VECREDUCE(SDNode *N); SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); + SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -493,6 +494,7 @@ private: SDValue ExpandIntOp_RETURNADDR(SDNode *N); SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N); + SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &dl); @@ -741,6 +743,7 @@ private: SDValue SoftPromoteHalfOp_SETCC(SDNode *N); SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index fa555be00ded..143abc08eeea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5627,7 +5627,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); - unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); assert(getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector && @@ -5639,7 +5638,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // See if a widened result type would be legal, if so widen the node. // FIXME: This isn't safe for StrictFP. Other optimization here is needed. EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, - InVT.getVectorNumElements()); + InVT.getVectorElementCount()); if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) { SDValue Res; if (N->isStrictFPOpcode()) { @@ -5665,6 +5664,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { EVT InEltVT = InVT.getVectorElementType(); // Unroll the convert into some scalar code and create a nasty build vector. + unsigned NumElts = VT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(NumElts); if (N->isStrictFPOpcode()) { SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); @@ -6055,7 +6055,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. if (VT.getScalarType() == MVT::i1) SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - SVT.getVectorNumElements()); + SVT.getVectorElementCount()); SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), SVT, InOp0, InOp1, N->getOperand(2)); @@ -6063,7 +6063,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // Extract the needed results from the result vector. EVT ResVT = EVT::getVectorVT(*DAG.getContext(), SVT.getVectorElementType(), - VT.getVectorNumElements()); + VT.getVectorElementCount()); SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getVectorIdxConstant(0, dl)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b3b8756ae9ba..c8d0f5faf647 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -60,7 +60,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/raw_ostream.h" @@ -3271,6 +3270,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; case ISD::SETCC: + case ISD::SETCCCARRY: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; @@ -3506,6 +3506,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; case ISD::USUBO: case ISD::SSUBO: + case ISD::SUBCARRY: + case ISD::SSUBO_CARRY: if (Op.getResNo() == 1) { // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == @@ -3520,6 +3522,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert(Op.getResNo() == 0 && "We only compute knownbits for the difference here."); + // TODO: Compute influence of the carry operand. + if (Opcode == ISD::SUBCARRY || Opcode == ISD::SSUBO_CARRY) + break; + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, @@ -3529,6 +3535,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::UADDO: case ISD::SADDO: case ISD::ADDCARRY: + case ISD::SADDO_CARRY: if (Op.getResNo() == 1) { // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == @@ -3548,7 +3555,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (Opcode == ISD::ADDE) // Can't track carry from glue, set carry to unknown. Carry.resetAll(); - else if (Opcode == ISD::ADDCARRY) + else if (Opcode == ISD::ADDCARRY || Opcode == ISD::SADDO_CARRY) // TODO: Compute known bits for the carry operand. Not sure if it is worth // the trouble (how often will we find a known carry bit). And I haven't // tested this very much yet, but something like this might work: @@ -3862,6 +3869,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2()) return true; + // vscale(power-of-two) is a power-of-two for some targets + if (Val.getOpcode() == ISD::VSCALE && + getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() && + isKnownToBeAPowerOfTwo(Val.getOperand(0))) + return true; + // More could be done here, though the above checks are enough // to handle some common cases. @@ -4108,8 +4121,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::min(Tmp, Tmp2); case ISD::SADDO: case ISD::UADDO: + case ISD::SADDO_CARRY: + case ISD::ADDCARRY: case ISD::SSUBO: case ISD::USUBO: + case ISD::SSUBO_CARRY: + case ISD::SUBCARRY: case ISD::SMULO: case ISD::UMULO: if (Op.getResNo() != 1) @@ -4123,6 +4140,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return VTBits; break; case ISD::SETCC: + case ISD::SETCCCARRY: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; @@ -7505,6 +7523,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, Opcode == ISD::ATOMIC_LOAD_UMAX || Opcode == ISD::ATOMIC_LOAD_FADD || Opcode == ISD::ATOMIC_LOAD_FSUB || + Opcode == ISD::ATOMIC_LOAD_FMAX || + Opcode == ISD::ATOMIC_LOAD_FMIN || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_STORE) && "Invalid Atomic Op"); @@ -10739,19 +10759,19 @@ namespace { } // end anonymous namespace -static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs; -static ManagedStatic<EVTArray> SimpleVTArray; -static ManagedStatic<sys::SmartMutex<true>> VTMutex; - /// getValueTypeList - Return a pointer to the specified value type. /// const EVT *SDNode::getValueTypeList(EVT VT) { + static std::set<EVT, EVT::compareRawBits> EVTs; + static EVTArray SimpleVTArray; + static sys::SmartMutex<true> VTMutex; + if (VT.isExtended()) { - sys::SmartScopedLock<true> Lock(*VTMutex); - return &(*EVTs->insert(VT).first); + sys::SmartScopedLock<true> Lock(VTMutex); + return &(*EVTs.insert(VT).first); } assert(VT.getSimpleVT() < MVT::VALUETYPE_SIZE && "Value type out of range!"); - return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; + return &SimpleVTArray.VTs[VT.getSimpleVT().SimpleTy]; } /// hasNUsesOfValue - Return true if there are exactly NUSES uses of the diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 37d05cdba76d..fe3c38ec590d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -703,7 +703,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, unsigned NumRegs; if (IsABIRegCopy) { NumRegs = TLI.getVectorTypeBreakdownForCallingConv( - *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, + *DAG.getContext(), CallConv.value(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); } else { NumRegs = @@ -800,11 +800,11 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, for (EVT ValueVT : ValueVTs) { unsigned NumRegs = isABIMangled() - ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT) + ? TLI.getNumRegistersForCallingConv(Context, CC.value(), ValueVT) : TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = isABIMangled() - ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT) + ? TLI.getRegisterTypeForCallingConv(Context, CC.value(), ValueVT) : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); @@ -831,10 +831,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = RegCount[Value]; - MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( - *DAG.getContext(), - CallConv.getValue(), RegVTs[Value]) - : RegVTs[Value]; + MVT RegisterVT = + isABIMangled() ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), CallConv.value(), RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -914,10 +914,10 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumParts = RegCount[Value]; - MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( - *DAG.getContext(), - CallConv.getValue(), RegVTs[Value]) - : RegVTs[Value]; + MVT RegisterVT = + isABIMangled() ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), CallConv.value(), RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -1309,7 +1309,7 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false)) { LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n " - << DDI.getDI() << "\nBy stripping back to:\n " << V); + << *DDI.getDI() << "\nBy stripping back to:\n " << *V); return; } } @@ -1321,7 +1321,7 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); DAG.AddDbgValue(SDV, false); - LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI() + LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << *DDI.getDI() << "\n"); LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0) << "\n"); @@ -3747,13 +3747,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } -void SelectionDAGBuilder::visitInsertValue(const User &I) { - ArrayRef<unsigned> Indices; - if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I)) - Indices = IV->getIndices(); - else - Indices = cast<ConstantExpr>(&I)->getIndices(); - +void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { + ArrayRef<unsigned> Indices = I.getIndices(); const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); @@ -4616,6 +4611,8 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break; case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; + case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break; + case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break; } AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); @@ -8410,52 +8407,6 @@ public: return false; } - - /// getCallOperandValEVT - Return the EVT of the Value* that this operand - /// corresponds to. If there is no Value* for this operand, it returns - /// MVT::Other. - EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, - llvm::Type *ParamElemType) const { - if (!CallOperandVal) return MVT::Other; - - if (isa<BasicBlock>(CallOperandVal)) - return TLI.getProgramPointerTy(DL); - - llvm::Type *OpTy = CallOperandVal->getType(); - - // FIXME: code duplicated from TargetLowering::ParseConstraints(). - // If this is an indirect operand, the operand is a pointer to the - // accessed type. - if (isIndirect) { - OpTy = ParamElemType; - assert(OpTy && "Indirect operand must have elementtype attribute"); - } - - // Look for vector wrapped in a struct. e.g. { <16 x i8> }. - if (StructType *STy = dyn_cast<StructType>(OpTy)) - if (STy->getNumElements() == 1) - OpTy = STy->getElementType(0); - - // If OpTy is not a single value, it may be a struct/union that we - // can tile with integers. - if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = DL.getTypeSizeInBits(OpTy); - switch (BitSize) { - default: break; - case 1: - case 8: - case 16: - case 32: - case 64: - case 128: - OpTy = IntegerType::get(Context, BitSize); - break; - } - } - - return TLI.getAsmOperandValueType(DL, OpTy, true); - } }; @@ -8722,37 +8673,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, bool HasSideEffect = IA->hasSideEffects(); ExtraFlags ExtraInfo(Call); - unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. - unsigned ResNo = 0; // ResNo - The result number of the next output. for (auto &T : TargetConstraints) { ConstraintOperands.push_back(SDISelAsmOperandInfo(T)); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); - // Compute the value type for each operand. - if (OpInfo.hasArg()) { - OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); + if (OpInfo.CallOperandVal) OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); - Type *ParamElemTy = Call.getParamElementType(ArgNo); - EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, - DAG.getDataLayout(), ParamElemTy); - OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other; - ArgNo++; - } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { - // The return value of the call is this value. As such, there is no - // corresponding argument. - assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); - if (StructType *STy = dyn_cast<StructType>(Call.getType())) { - OpInfo.ConstraintVT = TLI.getSimpleValueType( - DAG.getDataLayout(), STy->getElementType(ResNo)); - } else { - assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = TLI.getAsmOperandValueType( - DAG.getDataLayout(), Call.getType()).getSimpleVT(); - } - ++ResNo; - } else { - OpInfo.ConstraintVT = MVT::Other; - } if (!HasSideEffect) HasSideEffect = OpInfo.hasMemory(TLI); @@ -8865,7 +8791,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, if (RegError) { const MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const char *RegName = TRI.getName(RegError.getValue()); + const char *RegName = TRI.getName(RegError.value()); emitInlineAsmError(Call, "register '" + Twine(RegName) + "' allocated for constraint '" + Twine(OpInfo.ConstraintCode) + @@ -9385,9 +9311,9 @@ static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx, } } -/// Lower llvm.experimental.stackmap directly to its target opcode. +/// Lower llvm.experimental.stackmap. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { - // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, + // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, // [live variables...]) assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); @@ -9412,29 +9338,45 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL); InFlag = Chain.getValue(1); - // Add the <id> and <numBytes> constants. - SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64)); - SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL, - MVT::i32)); - - // Push live variables for the stack map. - addStackMapLiveVars(CI, 2, DL, Ops, *this); - - // We are not pushing any register mask info here on the operands list, - // because the stackmap doesn't clobber anything. - - // Push the chain and the glue flag. + // Add the STACKMAP operands, starting with DAG house-keeping. Ops.push_back(Chain); Ops.push_back(InFlag); + // Add the <id>, <numShadowBytes> operands. + // + // These do not require legalisation, and can be emitted directly to target + // constant nodes. + SDValue ID = getValue(CI.getArgOperand(0)); + assert(ID.getValueType() == MVT::i64); + SDValue IDConst = DAG.getTargetConstant( + cast<ConstantSDNode>(ID)->getZExtValue(), DL, ID.getValueType()); + Ops.push_back(IDConst); + + SDValue Shad = getValue(CI.getArgOperand(1)); + assert(Shad.getValueType() == MVT::i32); + SDValue ShadConst = DAG.getTargetConstant( + cast<ConstantSDNode>(Shad)->getZExtValue(), DL, Shad.getValueType()); + Ops.push_back(ShadConst); + + // Add the live variables. + for (unsigned I = 2; I < CI.arg_size(); I++) { + SDValue Op = getValue(CI.getArgOperand(I)); + + // Things on the stack are pointer-typed, meaning that they are already + // legal and can be emitted directly to target nodes. + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Ops.push_back(DAG.getTargetFrameIndex( + FI->getIndex(), TLI.getFrameIndexTy(DAG.getDataLayout()))); + } else { + // Otherwise emit a target independent node to be legalised. + Ops.push_back(getValue(CI.getArgOperand(I))); + } + } + // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); - Chain = SDValue(SM, 0); + Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 72cca3d9b001..4a3ab00614b3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -529,7 +529,7 @@ private: void visitShuffleVector(const User &I); void visitExtractValue(const ExtractValueInst &I); - void visitInsertValue(const User &I); + void visitInsertValue(const InsertValueInst &I); void visitLandingPad(const LandingPadInst &LP); void visitGetElementPtr(const User &I); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index bbfc6e5ef64f..9df0b64c26c3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -486,6 +486,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VECREDUCE_UMIN: return "vecreduce_umin"; case ISD::VECREDUCE_FMAX: return "vecreduce_fmax"; case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; + case ISD::STACKMAP: + return "stackmap"; // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2b63359c2b1b..7f453f081982 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -51,6 +50,7 @@ #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -64,7 +64,6 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstIterator.h" @@ -345,47 +344,6 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that -/// may trap on it. In this case we have to split the edge so that the path -/// through the predecessor block that doesn't go to the phi block doesn't -/// execute the possibly trapping instruction. If available, we pass domtree -/// and loop info to be updated when we split critical edges. This is because -/// SelectionDAGISel preserves these analyses. -/// This is required for correctness, so it must be done at -O0. -/// -static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, - LoopInfo *LI) { - // Loop for blocks with phi nodes. - for (BasicBlock &BB : Fn) { - PHINode *PN = dyn_cast<PHINode>(BB.begin()); - if (!PN) continue; - - ReprocessBlock: - // For each block with a PHI node, check to see if any of the input values - // are potentially trapping constant expressions. Constant expressions are - // the only potentially trapping value that can occur as the argument to a - // PHI. - for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I) - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Constant *C = dyn_cast<Constant>(PN->getIncomingValue(i)); - if (!C || !C->canTrap()) continue; - - // The only case we have to worry about is when the edge is critical. - // Since this block has a PHI Node, we assume it has multiple input - // edges: check to see if the pred has multiple successors. - BasicBlock *Pred = PN->getIncomingBlock(i); - if (Pred->getTerminator()->getNumSuccessors() == 1) - continue; - - // Okay, we have to split this edge. - SplitCriticalEdge( - Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), - CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges()); - goto ReprocessBlock; - } - } -} - static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F, MachineModuleInfo &MMI) { // Only needed for MSVC @@ -445,10 +403,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn); - auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); - LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); BlockFrequencyInfo *BFI = nullptr; if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None) @@ -456,8 +410,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); - CurDAG->init(*MF, *ORE, this, LibInfo, getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI); FuncInfo->set(Fn, *MF, CurDAG); @@ -2241,6 +2193,52 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::Select_STACKMAP(SDNode *N) { + std::vector<SDValue> Ops; + auto *It = N->op_begin(); + SDLoc DL(N); + + // Stash the chain and glue operands so we can move them to the end. + SDValue Chain = *It++; + SDValue InFlag = *It++; + + // <id> operand. + SDValue ID = *It++; + assert(ID.getValueType() == MVT::i64); + Ops.push_back(ID); + + // <numShadowBytes> operand. + SDValue Shad = *It++; + assert(Shad.getValueType() == MVT::i32); + Ops.push_back(Shad); + + // Live variable operands. + for (; It != N->op_end(); It++) { + SDNode *OpNode = It->getNode(); + SDValue O; + + // FrameIndex nodes should have been directly emitted to TargetFrameIndex + // nodes at DAG-construction time. + assert(OpNode->getOpcode() != ISD::FrameIndex); + + if (OpNode->getOpcode() == ISD::Constant) { + Ops.push_back( + CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + O = CurDAG->getTargetConstant( + cast<ConstantSDNode>(OpNode)->getZExtValue(), DL, It->getValueType()); + } else { + O = *It; + } + Ops.push_back(O); + } + + Ops.push_back(Chain); + Ops.push_back(InFlag); + + SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue); + CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2795,6 +2793,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::ARITH_FENCE: Select_ARITH_FENCE(NodeToMatch); return; + case ISD::STACKMAP: + Select_STACKMAP(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 19a52fde44c1..3061158eea30 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -531,14 +531,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, for (const Value *V : SI.Bases) { auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); if (Opt) { - assert(Opt.getValue() && + assert(Opt.value() && "non gc managed base pointer found in statepoint"); } } for (const Value *V : SI.Ptrs) { auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); if (Opt) { - assert(Opt.getValue() && + assert(Opt.value() && "non gc managed derived pointer found in statepoint"); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a6b471ea22b7..66389a57f780 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1362,6 +1362,29 @@ bool TargetLowering::SimplifyDemandedBits( } } + // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I) + // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits). + if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && + (Op0.getOperand(0).isUndef() || + ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) && + Op0->hasOneUse()) { + unsigned NumSubElts = + Op0.getOperand(1).getValueType().getVectorNumElements(); + unsigned SubIdx = Op0.getConstantOperandVal(2); + APInt DemandedSub = + APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts); + KnownBits KnownSubMask = + TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1); + if (DemandedBits.isSubsetOf(KnownSubMask.One)) { + SDValue NewAnd = + TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1); + SDValue NewInsert = + TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd, + Op0.getOperand(1), Op0.getOperand(2)); + return TLO.CombineTo(Op, NewInsert); + } + } + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) return true; @@ -1371,20 +1394,6 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); - // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { - SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( - Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( - Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - if (DemandedOp0 || DemandedOp1) { - Op0 = DemandedOp0 ? DemandedOp0 : Op0; - Op1 = DemandedOp1 ? DemandedOp1 : Op1; - SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); - return TLO.CombineTo(Op, NewOp); - } - } - // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One)) @@ -1402,6 +1411,20 @@ bool TargetLowering::SimplifyDemandedBits( if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + Known &= Known2; break; } @@ -1418,6 +1441,19 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) + return TLO.CombineTo(Op, Op0); + if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) + return TLO.CombineTo(Op, Op1); + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) + return true; + // If the operation can be done in a smaller type, do so. + if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) + return true; + // Attempt to avoid multi-use ops if we don't need anything from them. if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( @@ -1432,19 +1468,6 @@ bool TargetLowering::SimplifyDemandedBits( } } - // If all of the demanded bits are known zero on one side, return the other. - // These bits cannot contribute to the result of the 'or'. - if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) - return TLO.CombineTo(Op, Op0); - if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) - return TLO.CombineTo(Op, Op1); - // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) - return true; - // If the operation can be done in a smaller type, do so. - if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) - return true; - Known |= Known2; break; } @@ -1461,20 +1484,6 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); - // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { - SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( - Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( - Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - if (DemandedOp0 || DemandedOp1) { - Op0 = DemandedOp0 ? DemandedOp0 : Op0; - Op1 = DemandedOp1 ? DemandedOp1 : Op1; - SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); - return TLO.CombineTo(Op, NewOp); - } - } - // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. if (DemandedBits.isSubsetOf(Known.Zero)) @@ -1519,6 +1528,20 @@ bool TargetLowering::SimplifyDemandedBits( if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + Known ^= Known2; break; } @@ -1972,9 +1995,9 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); Known = KnownBits::umin(Known0, Known1); if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1)) - return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1); + return TLO.CombineTo(Op, IsULE.value() ? Op0 : Op1); if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1)) - return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1); + return TLO.CombineTo(Op, IsULT.value() ? Op0 : Op1); break; } case ISD::UMAX: { @@ -1985,9 +2008,9 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); Known = KnownBits::umax(Known0, Known1); if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1)) - return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1); + return TLO.CombineTo(Op, IsUGE.value() ? Op0 : Op1); if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1)) - return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1); + return TLO.CombineTo(Op, IsUGT.value() ? Op0 : Op1); break; } case ISD::BITREVERSE: { @@ -2486,9 +2509,7 @@ bool TargetLowering::SimplifyDemandedBits( // won't wrap after simplification. Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); - SDValue NewOp = - TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); - return TLO.CombineTo(Op, NewOp); + Op->setFlags(Flags); } return true; } @@ -3031,15 +3052,15 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::VSELECT: { + SDValue Sel = Op.getOperand(0); + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + // Try to transform the select condition based on the current demanded // elements. - // TODO: If a condition element is undef, we can choose from one arm of the - // select (and if one arm is undef, then we can propagate that to the - // result). - // TODO - add support for constant vselect masks (see IR version of this). - APInt UnusedUndef, UnusedZero; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef, - UnusedZero, TLO, Depth + 1)) + APInt UndefSel, UndefZero; + if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO, + Depth + 1)) return true; // See if we can simplify either vselect operand. @@ -3047,15 +3068,24 @@ bool TargetLowering::SimplifyDemandedVectorElts( APInt DemandedRHS(DemandedElts); APInt UndefLHS, ZeroLHS; APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; KnownUndef = UndefLHS & UndefRHS; KnownZero = ZeroLHS & ZeroRHS; + + // If we know that the selected element is always zero, we don't need the + // select value element. + APInt DemandedSel = DemandedElts & ~KnownZero; + if (DemandedSel != DemandedElts) + if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO, + Depth + 1)) + return true; + break; } case ISD::VECTOR_SHUFFLE: { @@ -5239,17 +5269,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL, case 32: case 64: case 128: - OpInfo.ConstraintVT = - MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); + OpTy = IntegerType::get(OpTy->getContext(), BitSize); break; } - } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { - unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace()); - OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); - } else { - OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } + EVT VT = getAsmOperandValueType(DL, OpTy, true); + OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other; ArgNo++; } } @@ -7833,7 +7859,7 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { // return popcount(~x); // // Ref: "Hacker's Delight" by Henry Warren - for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) { + for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) { SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::OR, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp)); diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index f3d68bd9c92d..2badbe34ae6a 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -449,9 +449,6 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { Name == ".llvmbc" || Name == ".llvmcmd") return SectionKind::getMetadata(); - if (Name == ".llvm.offloading") - return SectionKind::getExclude(); - if (Name.empty() || Name[0] != '.') return K; // Default implementation based on some magic section names. @@ -501,6 +498,9 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) { if (hasPrefix(Name, ".preinit_array")) return ELF::SHT_PREINIT_ARRAY; + if (hasPrefix(Name, ".llvm.offloading")) + return ELF::SHT_LLVM_OFFLOADING; + if (K.isBSS() || K.isThreadBSS()) return ELF::SHT_NOBITS; diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp index 50c52190c1f6..298359dea9af 100644 --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -1005,6 +1005,7 @@ void DWARFLinker::DIECloner::cloneExpression( // instead indicate the generic type. The same holds for // DW_OP_reinterpret, which is currently not supported. if (RefOffset > 0 || Op.getCode() != dwarf::DW_OP_convert) { + RefOffset += Unit.getOrigUnit().getOffset(); auto RefDie = Unit.getOrigUnit().getDIEForOffset(RefOffset); CompileUnit::DIEInfo &Info = Unit.getInfo(RefDie); if (DIE *Clone = Info.Clone) diff --git a/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp b/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp index d12f6c796e50..74803a3e495a 100644 --- a/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp +++ b/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp @@ -8,7 +8,6 @@ #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include <string> using namespace llvm; @@ -42,9 +41,9 @@ public: }; } // namespace -static llvm::ManagedStatic<CodeViewErrorCategory> CodeViewErrCategory; const std::error_category &llvm::codeview::CVErrorCategory() { - return *CodeViewErrCategory; + static CodeViewErrorCategory CodeViewErrCategory; + return CodeViewErrCategory; } char CodeViewError::ID; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index c785026f8461..2e567d8bc7ee 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1205,13 +1205,13 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) { if (const auto *LT = CU->getContext().getLineTableForUnit(CU)) LT->getFileNameByIndex( - DeclFileAttr->getAsUnsignedConstant().getValue(), + DeclFileAttr->getAsUnsignedConstant().value(), CU->getCompilationDir(), DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Local.DeclFile); } if (auto DeclLineAttr = Die.find(DW_AT_decl_line)) - Local.DeclLine = DeclLineAttr->getAsUnsignedConstant().getValue(); + Local.DeclLine = DeclLineAttr->getAsUnsignedConstant().value(); Result.push_back(Local); return; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 2e0780e249aa..33856c12b3c9 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -327,20 +327,20 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, FileEntry.Source = Value; break; case DW_LNCT_directory_index: - FileEntry.DirIdx = Value.getAsUnsignedConstant().getValue(); + FileEntry.DirIdx = Value.getAsUnsignedConstant().value(); break; case DW_LNCT_timestamp: - FileEntry.ModTime = Value.getAsUnsignedConstant().getValue(); + FileEntry.ModTime = Value.getAsUnsignedConstant().value(); break; case DW_LNCT_size: - FileEntry.Length = Value.getAsUnsignedConstant().getValue(); + FileEntry.Length = Value.getAsUnsignedConstant().value(); break; case DW_LNCT_MD5: - if (!Value.getAsBlock() || Value.getAsBlock().getValue().size() != 16) + if (!Value.getAsBlock() || Value.getAsBlock().value().size() != 16) return createStringError( errc::invalid_argument, "failed to parse file entry because the MD5 hash is invalid"); - std::uninitialized_copy_n(Value.getAsBlock().getValue().begin(), 16, + std::uninitialized_copy_n(Value.getAsBlock().value().begin(), 16, FileEntry.Checksum.begin()); break; default: diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 96c546250974..15a2d23c4fd2 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -136,23 +136,30 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, auto Color = HighlightColor::Enumerator; if (Attr == DW_AT_decl_file || Attr == DW_AT_call_file) { Color = HighlightColor::String; - if (const auto *LT = U->getContext().getLineTableForUnit(U)) - if (LT->getFileNameByIndex( - *FormValue.getAsUnsignedConstant(), U->getCompilationDir(), - DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) { - File = '"' + File + '"'; - Name = File; + if (const auto *LT = U->getContext().getLineTableForUnit(U)) { + if (Optional<uint64_t> Val = FormValue.getAsUnsignedConstant()) { + if (LT->getFileNameByIndex( + *Val, U->getCompilationDir(), + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, + File)) { + File = '"' + File + '"'; + Name = File; + } } + } } else if (Optional<uint64_t> Val = FormValue.getAsUnsignedConstant()) Name = AttributeValueString(Attr, *Val); if (!Name.empty()) WithColor(OS, Color) << Name; - else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line) - OS << *FormValue.getAsUnsignedConstant(); - else if (Attr == DW_AT_low_pc && - (FormValue.getAsAddress() == - dwarf::computeTombstoneAddress(U->getAddressByteSize()))) { + else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line) { + if (Optional<uint64_t> Val = FormValue.getAsUnsignedConstant()) + OS << *Val; + else + FormValue.dump(OS, DumpOpts); + } else if (Attr == DW_AT_low_pc && + (FormValue.getAsAddress() == + dwarf::computeTombstoneAddress(U->getAddressByteSize()))) { if (DumpOpts.Verbose) { FormValue.dump(OS, DumpOpts); OS << " ("; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index c704f8f583af..2be2a12aa025 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -704,6 +704,14 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, } break; } + case DW_AT_call_line: + case DW_AT_decl_line: { + if (!AttrValue.Value.getAsUnsignedConstant()) { + ReportError("DIE has " + AttributeString(Attr) + + " with invalid encoding"); + } + break; + } default: break; } diff --git a/llvm/lib/DebugInfo/MSF/MSFError.cpp b/llvm/lib/DebugInfo/MSF/MSFError.cpp index 9df2158423a4..fd93c3e726cc 100644 --- a/llvm/lib/DebugInfo/MSF/MSFError.cpp +++ b/llvm/lib/DebugInfo/MSF/MSFError.cpp @@ -8,7 +8,6 @@ #include "llvm/DebugInfo/MSF/MSFError.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include <string> using namespace llvm; @@ -50,7 +49,9 @@ public: }; } // namespace -static llvm::ManagedStatic<MSFErrorCategory> MSFCategory; -const std::error_category &llvm::msf::MSFErrCategory() { return *MSFCategory; } +const std::error_category &llvm::msf::MSFErrCategory() { + static MSFErrorCategory MSFCategory; + return MSFCategory; +} char MSFError::ID; diff --git a/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp b/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp index 819651f77787..0bd93a0e9506 100644 --- a/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp +++ b/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp @@ -1,6 +1,5 @@ #include "llvm/DebugInfo/PDB/DIA/DIAError.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" using namespace llvm; using namespace llvm::pdb; @@ -31,7 +30,9 @@ public: } }; -static llvm::ManagedStatic<DIAErrorCategory> DIACategory; -const std::error_category &llvm::pdb::DIAErrCategory() { return *DIACategory; } +const std::error_category &llvm::pdb::DIAErrCategory() { + static DIAErrorCategory DIACategory; + return DIACategory; +} char DIAError::ID; diff --git a/llvm/lib/DebugInfo/PDB/GenericError.cpp b/llvm/lib/DebugInfo/PDB/GenericError.cpp index 0e4cba3174b2..d6da2dd62140 100644 --- a/llvm/lib/DebugInfo/PDB/GenericError.cpp +++ b/llvm/lib/DebugInfo/PDB/GenericError.cpp @@ -8,7 +8,6 @@ #include "llvm/DebugInfo/PDB/GenericError.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" using namespace llvm; using namespace llvm::pdb; @@ -42,7 +41,9 @@ public: }; } // namespace -static llvm::ManagedStatic<PDBErrorCategory> PDBCategory; -const std::error_category &llvm::pdb::PDBErrCategory() { return *PDBCategory; } +const std::error_category &llvm::pdb::PDBErrCategory() { + static PDBErrorCategory PDBCategory; + return PDBCategory; +} char PDBError::ID; diff --git a/llvm/lib/DebugInfo/PDB/Native/RawError.cpp b/llvm/lib/DebugInfo/PDB/Native/RawError.cpp index ed6cf0839675..31320288a603 100644 --- a/llvm/lib/DebugInfo/PDB/Native/RawError.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/RawError.cpp @@ -1,6 +1,5 @@ #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" using namespace llvm; using namespace llvm::pdb; @@ -47,7 +46,9 @@ public: }; } // namespace -static llvm::ManagedStatic<RawErrorCategory> RawCategory; -const std::error_category &llvm::pdb::RawErrCategory() { return *RawCategory; } +const std::error_category &llvm::pdb::RawErrCategory() { + static RawErrorCategory RawCategory; + return RawCategory; +} char RawError::ID; diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index d2ff8aa7c995..c239d4c260ec 100644 --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -327,6 +327,8 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> &Obj) { return {}; } +} // end anonymous namespace + Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) { Optional<ArrayRef<uint8_t>> BuildID; if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Obj)) @@ -342,8 +344,6 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) { return BuildID; } -} // end anonymous namespace - ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, const MachOObjectFile *MachExeObj, const std::string &ArchName) { diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp index 7b1c36fdbe09..ef4e11ca38e6 100644 --- a/llvm/lib/Debuginfod/Debuginfod.cpp +++ b/llvm/lib/Debuginfod/Debuginfod.cpp @@ -8,25 +8,39 @@ /// /// \file /// -/// This file defines the fetchInfo function, which retrieves -/// any of the three supported artifact types: (executable, debuginfo, source -/// file) associated with a build-id from debuginfod servers. If a source file -/// is to be fetched, its absolute path must be specified in the Description -/// argument to fetchInfo. +/// This file contains several definitions for the debuginfod client and server. +/// For the client, this file defines the fetchInfo function. For the server, +/// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as +/// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo +/// function retrieves any of the three supported artifact types: (executable, +/// debuginfo, source file) associated with a build-id from debuginfod servers. +/// If a source file is to be fetched, its absolute path must be specified in +/// the Description argument to fetchInfo. The DebuginfodLogEntry, +/// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to +/// scan the local filesystem for binaries and serve the debuginfod protocol. /// //===----------------------------------------------------------------------===// #include "llvm/Debuginfod/Debuginfod.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Debuginfod/HTTPClient.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/Caching.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Support/xxhash.h" +#include <atomic> + namespace llvm { static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); } @@ -46,6 +60,8 @@ Expected<SmallVector<StringRef>> getDefaultDebuginfodUrls() { return DebuginfodUrls; } +/// Finds a default local file caching directory for the debuginfod client, +/// first checking DEBUGINFOD_CACHE_PATH. Expected<std::string> getDefaultDebuginfodCacheDirectory() { if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH")) return CacheDirectoryEnv; @@ -208,4 +224,293 @@ Expected<std::string> getCachedOrDownloadArtifact( return createStringError(errc::argument_out_of_domain, "build id not found"); } + +DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message) + : Message(Message.str()) {} + +void DebuginfodLog::push(const Twine &Message) { + push(DebuginfodLogEntry(Message)); +} + +void DebuginfodLog::push(DebuginfodLogEntry Entry) { + { + std::lock_guard<std::mutex> Guard(QueueMutex); + LogEntryQueue.push(Entry); + } + QueueCondition.notify_one(); +} + +DebuginfodLogEntry DebuginfodLog::pop() { + { + std::unique_lock<std::mutex> Guard(QueueMutex); + // Wait for messages to be pushed into the queue. + QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); }); + } + std::lock_guard<std::mutex> Guard(QueueMutex); + if (!LogEntryQueue.size()) + llvm_unreachable("Expected message in the queue."); + + DebuginfodLogEntry Entry = LogEntryQueue.front(); + LogEntryQueue.pop(); + return Entry; +} + +DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef, + DebuginfodLog &Log, ThreadPool &Pool, + double MinInterval) + : Log(Log), Pool(Pool), MinInterval(MinInterval) { + for (StringRef Path : PathsRef) + Paths.push_back(Path.str()); +} + +Error DebuginfodCollection::update() { + std::lock_guard<sys::Mutex> Guard(UpdateMutex); + if (UpdateTimer.isRunning()) + UpdateTimer.stopTimer(); + UpdateTimer.clear(); + for (const std::string &Path : Paths) { + Log.push("Updating binaries at path " + Path); + if (Error Err = findBinaries(Path)) + return Err; + } + Log.push("Updated collection"); + UpdateTimer.startTimer(); + return Error::success(); +} + +Expected<bool> DebuginfodCollection::updateIfStale() { + if (!UpdateTimer.isRunning()) + return false; + UpdateTimer.stopTimer(); + double Time = UpdateTimer.getTotalTime().getWallTime(); + UpdateTimer.startTimer(); + if (Time < MinInterval) + return false; + if (Error Err = update()) + return std::move(Err); + return true; +} + +Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) { + while (true) { + if (Error Err = update()) + return Err; + std::this_thread::sleep_for(Interval); + } + llvm_unreachable("updateForever loop should never end"); +} + +static bool isDebugBinary(object::ObjectFile *Object) { + // TODO: handle PDB debuginfo + std::unique_ptr<DWARFContext> Context = DWARFContext::create( + *Object, DWARFContext::ProcessDebugRelocations::Process); + const DWARFObject &DObj = Context->getDWARFObj(); + unsigned NumSections = 0; + DObj.forEachInfoSections([&](const DWARFSection &S) { NumSections++; }); + return NumSections; +} + +static bool hasELFMagic(StringRef FilePath) { + file_magic Type; + std::error_code EC = identify_magic(FilePath, Type); + if (EC) + return false; + switch (Type) { + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + return true; + default: + return false; + } +} + +Error DebuginfodCollection::findBinaries(StringRef Path) { + std::error_code EC; + sys::fs::recursive_directory_iterator I(Twine(Path), EC), E; + std::mutex IteratorMutex; + ThreadPoolTaskGroup IteratorGroup(Pool); + for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount(); + WorkerIndex++) { + IteratorGroup.async([&, this]() -> void { + std::string FilePath; + while (true) { + { + // Check if iteration is over or there is an error during iteration + std::lock_guard<std::mutex> Guard(IteratorMutex); + if (I == E || EC) + return; + // Grab a file path from the directory iterator and advance the + // iterator. + FilePath = I->path(); + I.increment(EC); + } + + // Inspect the file at this path to determine if it is debuginfo. + if (!hasELFMagic(FilePath)) + continue; + + Expected<object::OwningBinary<object::Binary>> BinOrErr = + object::createBinary(FilePath); + + if (!BinOrErr) { + consumeError(BinOrErr.takeError()); + continue; + } + object::Binary *Bin = std::move(BinOrErr.get().getBinary()); + if (!Bin->isObject()) + continue; + + // TODO: Support non-ELF binaries + object::ELFObjectFileBase *Object = + dyn_cast<object::ELFObjectFileBase>(Bin); + if (!Object) + continue; + + Optional<BuildIDRef> ID = symbolize::getBuildID(Object); + if (!ID) + continue; + + std::string IDString = buildIDToString(ID.value()); + if (isDebugBinary(Object)) { + std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex); + DebugBinaries[IDString] = FilePath; + } else { + std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex); + Binaries[IDString] = FilePath; + } + } + }); + } + IteratorGroup.wait(); + std::unique_lock<std::mutex> Guard(IteratorMutex); + if (EC) + return errorCodeToError(EC); + return Error::success(); +} + +Expected<Optional<std::string>> +DebuginfodCollection::getBinaryPath(BuildIDRef ID) { + Log.push("getting binary path of ID " + buildIDToString(ID)); + std::shared_lock<sys::RWMutex> Guard(BinariesMutex); + auto Loc = Binaries.find(buildIDToString(ID)); + if (Loc != Binaries.end()) { + std::string Path = Loc->getValue(); + return Path; + } + return None; +} + +Expected<Optional<std::string>> +DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) { + Log.push("getting debug binary path of ID " + buildIDToString(ID)); + std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex); + auto Loc = DebugBinaries.find(buildIDToString(ID)); + if (Loc != DebugBinaries.end()) { + std::string Path = Loc->getValue(); + return Path; + } + return None; +} + +Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) { + { + // Check collection; perform on-demand update if stale. + Expected<Optional<std::string>> PathOrErr = getBinaryPath(ID); + if (!PathOrErr) + return PathOrErr.takeError(); + Optional<std::string> Path = *PathOrErr; + if (!Path) { + Expected<bool> UpdatedOrErr = updateIfStale(); + if (!UpdatedOrErr) + return UpdatedOrErr.takeError(); + if (*UpdatedOrErr) { + // Try once more. + PathOrErr = getBinaryPath(ID); + if (!PathOrErr) + return PathOrErr.takeError(); + Path = *PathOrErr; + } + } + if (Path) + return Path.value(); + } + + // Try federation. + Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID); + if (!PathOrErr) + consumeError(PathOrErr.takeError()); + + // Fall back to debug binary. + return findDebugBinaryPath(ID); +} + +Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) { + // Check collection; perform on-demand update if stale. + Expected<Optional<std::string>> PathOrErr = getDebugBinaryPath(ID); + if (!PathOrErr) + return PathOrErr.takeError(); + Optional<std::string> Path = *PathOrErr; + if (!Path) { + Expected<bool> UpdatedOrErr = updateIfStale(); + if (!UpdatedOrErr) + return UpdatedOrErr.takeError(); + if (*UpdatedOrErr) { + // Try once more. + PathOrErr = getBinaryPath(ID); + if (!PathOrErr) + return PathOrErr.takeError(); + Path = *PathOrErr; + } + } + if (Path) + return Path.value(); + + // Try federation. + return getCachedOrDownloadDebuginfo(ID); +} + +DebuginfodServer::DebuginfodServer(DebuginfodLog &Log, + DebuginfodCollection &Collection) + : Log(Log), Collection(Collection) { + cantFail( + Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) { + Log.push("GET " + Request.UrlPath); + std::string IDString; + if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { + Request.setResponse( + {404, "text/plain", "Build ID is not a hex string\n"}); + return; + } + BuildID ID(IDString.begin(), IDString.end()); + Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID); + if (Error Err = PathOrErr.takeError()) { + consumeError(std::move(Err)); + Request.setResponse({404, "text/plain", "Build ID not found\n"}); + return; + } + streamFile(Request, *PathOrErr); + })); + cantFail( + Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) { + Log.push("GET " + Request.UrlPath); + std::string IDString; + if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { + Request.setResponse( + {404, "text/plain", "Build ID is not a hex string\n"}); + return; + } + BuildID ID(IDString.begin(), IDString.end()); + Expected<std::string> PathOrErr = Collection.findBinaryPath(ID); + if (Error Err = PathOrErr.takeError()) { + consumeError(std::move(Err)); + Request.setResponse({404, "text/plain", "Build ID not found\n"}); + return; + } + streamFile(Request, *PathOrErr); + })); +} + } // namespace llvm diff --git a/llvm/lib/Debuginfod/HTTPServer.cpp b/llvm/lib/Debuginfod/HTTPServer.cpp new file mode 100644 index 000000000000..2ea923d5a734 --- /dev/null +++ b/llvm/lib/Debuginfod/HTTPServer.cpp @@ -0,0 +1,189 @@ +//===-- llvm/Debuginfod/HTTPServer.cpp - HTTP server library -----*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// This file defines the methods of the HTTPServer class and the streamFile +/// function. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Debuginfod/HTTPServer.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" + +#ifdef LLVM_ENABLE_HTTPLIB +#include "httplib.h" +#endif + +using namespace llvm; + +bool llvm::streamFile(HTTPServerRequest &Request, StringRef FilePath) { + Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(FilePath); + if (Error Err = FDOrErr.takeError()) { + consumeError(std::move(Err)); + Request.setResponse({404u, "text/plain", "Could not open file to read.\n"}); + return false; + } + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = + MemoryBuffer::getOpenFile(*FDOrErr, FilePath, + /*FileSize=*/-1, + /*RequiresNullTerminator=*/false); + sys::fs::closeFile(*FDOrErr); + if (Error Err = errorCodeToError(MBOrErr.getError())) { + consumeError(std::move(Err)); + Request.setResponse({404u, "text/plain", "Could not memory-map file.\n"}); + return false; + } + // Lambdas are copied on conversion to to std::function, preventing use of + // smart pointers. + MemoryBuffer *MB = MBOrErr->release(); + Request.setResponse({200u, "application/octet-stream", MB->getBufferSize(), + [=](size_t Offset, size_t Length) -> StringRef { + return MB->getBuffer().substr(Offset, Length); + }, + [=](bool Success) { delete MB; }}); + return true; +} + +#ifdef LLVM_ENABLE_HTTPLIB + +bool HTTPServer::isAvailable() { return true; } + +HTTPServer::HTTPServer() { Server = std::make_unique<httplib::Server>(); } + +HTTPServer::~HTTPServer() { stop(); } + +static void expandUrlPathMatches(const std::smatch &Matches, + HTTPServerRequest &Request) { + bool UrlPathSet = false; + for (const auto &it : Matches) { + if (UrlPathSet) + Request.UrlPathMatches.push_back(it); + else { + Request.UrlPath = it; + UrlPathSet = true; + } + } +} + +HTTPServerRequest::HTTPServerRequest(const httplib::Request &HTTPLibRequest, + httplib::Response &HTTPLibResponse) + : HTTPLibResponse(HTTPLibResponse) { + expandUrlPathMatches(HTTPLibRequest.matches, *this); +} + +void HTTPServerRequest::setResponse(HTTPResponse Response) { + HTTPLibResponse.set_content(Response.Body.begin(), Response.Body.size(), + Response.ContentType); + HTTPLibResponse.status = Response.Code; +} + +void HTTPServerRequest::setResponse(StreamingHTTPResponse Response) { + HTTPLibResponse.set_content_provider( + Response.ContentLength, Response.ContentType, + [=](size_t Offset, size_t Length, httplib::DataSink &Sink) { + if (Offset < Response.ContentLength) { + StringRef Chunk = Response.Provider(Offset, Length); + Sink.write(Chunk.begin(), Chunk.size()); + } + return true; + }, + [=](bool Success) { Response.CompletionHandler(Success); }); + + HTTPLibResponse.status = Response.Code; +} + +Error HTTPServer::get(StringRef UrlPathPattern, HTTPRequestHandler Handler) { + std::string ErrorMessage; + if (!Regex(UrlPathPattern).isValid(ErrorMessage)) + return createStringError(errc::argument_out_of_domain, ErrorMessage); + Server->Get(std::string(UrlPathPattern), + [Handler](const httplib::Request &HTTPLibRequest, + httplib::Response &HTTPLibResponse) { + HTTPServerRequest Request(HTTPLibRequest, HTTPLibResponse); + Handler(Request); + }); + return Error::success(); +} + +Error HTTPServer::bind(unsigned ListenPort, const char *HostInterface) { + if (!Server->bind_to_port(HostInterface, ListenPort)) + return createStringError(errc::io_error, + "Could not assign requested address."); + Port = ListenPort; + return Error::success(); +} + +Expected<unsigned> HTTPServer::bind(const char *HostInterface) { + int ListenPort = Server->bind_to_any_port(HostInterface); + if (ListenPort < 0) + return createStringError(errc::io_error, + "Could not assign any port on requested address."); + return Port = ListenPort; +} + +Error HTTPServer::listen() { + if (!Port) + return createStringError(errc::io_error, + "Cannot listen without first binding to a port."); + if (!Server->listen_after_bind()) + return createStringError( + errc::io_error, + "An unknown error occurred when cpp-httplib attempted to listen."); + return Error::success(); +} + +void HTTPServer::stop() { + Server->stop(); + Port = 0; +} + +#else + +// TODO: Implement barebones standalone HTTP server implementation. +bool HTTPServer::isAvailable() { return false; } + +HTTPServer::HTTPServer() = default; + +HTTPServer::~HTTPServer() = default; + +void HTTPServerRequest::setResponse(HTTPResponse Response) { + llvm_unreachable("No HTTP server implementation available"); +} + +void HTTPServerRequest::setResponse(StreamingHTTPResponse Response) { + llvm_unreachable("No HTTP server implementation available"); +} + +Error HTTPServer::get(StringRef UrlPathPattern, HTTPRequestHandler Handler) { + llvm_unreachable("No HTTP server implementation available"); +} + +Error HTTPServer::bind(unsigned ListenPort, const char *HostInterface) { + llvm_unreachable("No HTTP server implementation available"); +} + +Expected<unsigned> HTTPServer::bind(const char *HostInterface) { + llvm_unreachable("No HTTP server implementation available"); +} + +Error HTTPServer::listen() { + llvm_unreachable("No HTTP server implementation available"); +} + +void HTTPServer::stop() { + llvm_unreachable("No HTTP server implementation available"); +} + +#endif // LLVM_ENABLE_HTTPLIB diff --git a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp index 29a623ebe449..f1eeee3b3599 100644 --- a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp +++ b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp @@ -12,7 +12,6 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" #include <mutex> @@ -91,11 +90,18 @@ typedef llvm::DenseMap<JITEventListener::ObjectKey, RegisteredObjectInfo> /// object files that are in executable memory managed by the client of this /// class. class GDBJITRegistrationListener : public JITEventListener { + /// Lock used to serialize all jit registration events, since they + /// modify global variables. + /// + /// Only a single instance of GDBJITRegistrationListener is ever created, + /// and so the lock can be a member variable of that instance. This ensures + /// destructors are run in the correct order. + sys::Mutex JITDebugLock; + /// A map of in-memory object files that have been registered with the /// JIT interface. RegisteredObjectBufferMap ObjectBufferMap; -public: /// Instantiates the JIT service. GDBJITRegistrationListener() = default; @@ -103,6 +109,12 @@ public: /// internal resources. ~GDBJITRegistrationListener() override; +public: + static GDBJITRegistrationListener &instance() { + static GDBJITRegistrationListener Instance; + return Instance; + } + /// Creates an entry in the JIT registry for the buffer @p Object, /// which must contain an object file in executable memory with any /// debug information for the debugger. @@ -121,10 +133,6 @@ private: void deregisterObjectInternal(RegisteredObjectBufferMap::iterator I); }; -/// Lock used to serialize all jit registration events, since they -/// modify global variables. -ManagedStatic<sys::Mutex> JITDebugLock; - /// Do the registration. void NotifyDebugger(jit_code_entry* JITCodeEntry) { __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; @@ -143,7 +151,7 @@ void NotifyDebugger(jit_code_entry* JITCodeEntry) { GDBJITRegistrationListener::~GDBJITRegistrationListener() { // Free all registered object files. - std::lock_guard<llvm::sys::Mutex> locked(*JITDebugLock); + std::lock_guard<llvm::sys::Mutex> locked(JITDebugLock); for (RegisteredObjectBufferMap::iterator I = ObjectBufferMap.begin(), E = ObjectBufferMap.end(); I != E; ++I) { @@ -167,7 +175,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded( const char *Buffer = DebugObj.getBinary()->getMemoryBufferRef().getBufferStart(); size_t Size = DebugObj.getBinary()->getMemoryBufferRef().getBufferSize(); - std::lock_guard<llvm::sys::Mutex> locked(*JITDebugLock); + std::lock_guard<llvm::sys::Mutex> locked(JITDebugLock); assert(ObjectBufferMap.find(K) == ObjectBufferMap.end() && "Second attempt to perform debug registration."); jit_code_entry* JITCodeEntry = new jit_code_entry(); @@ -186,7 +194,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded( } void GDBJITRegistrationListener::notifyFreeingObject(ObjectKey K) { - std::lock_guard<llvm::sys::Mutex> locked(*JITDebugLock); + std::lock_guard<llvm::sys::Mutex> locked(JITDebugLock); RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(K); if (I != ObjectBufferMap.end()) { @@ -228,14 +236,12 @@ void GDBJITRegistrationListener::deregisterObjectInternal( JITCodeEntry = nullptr; } -llvm::ManagedStatic<GDBJITRegistrationListener> GDBRegListener; - } // end namespace namespace llvm { JITEventListener* JITEventListener::createGDBRegistrationListener() { - return &*GDBRegListener; + return &GDBJITRegistrationListener::instance(); } } // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/COFF.cpp b/llvm/lib/ExecutionEngine/JITLink/COFF.cpp new file mode 100644 index 000000000000..fddc9b813fb2 --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/COFF.cpp @@ -0,0 +1,137 @@ +//===-------------- COFF.cpp - JIT linker function for COFF -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// COFF jit-link function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/COFF.h" + +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/ExecutionEngine/JITLink/COFF_x86_64.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstring> + +using namespace llvm; + +#define DEBUG_TYPE "jitlink" + +namespace llvm { +namespace jitlink { + +static StringRef getMachineName(uint16_t Machine) { + switch (Machine) { + case COFF::IMAGE_FILE_MACHINE_I386: + return "i386"; + case COFF::IMAGE_FILE_MACHINE_AMD64: + return "x86_64"; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return "ARM"; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return "ARM64"; + default: + return "unknown"; + } +} + +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromCOFFObject(MemoryBufferRef ObjectBuffer) { + StringRef Data = ObjectBuffer.getBuffer(); + + // Check magic + auto Magic = identify_magic(ObjectBuffer.getBuffer()); + if (Magic != file_magic::coff_object) + return make_error<JITLinkError>("Invalid COFF buffer"); + + if (Data.size() < sizeof(object::coff_file_header)) + return make_error<JITLinkError>("Truncated COFF buffer"); + + uint64_t CurPtr = 0; + bool IsPE = false; + + // Check if this is a PE/COFF file. + if (Data.size() >= sizeof(object::dos_header) + sizeof(COFF::PEMagic)) { + const auto *DH = + reinterpret_cast<const object::dos_header *>(Data.data() + CurPtr); + if (DH->Magic[0] == 'M' && DH->Magic[1] == 'Z') { + // Check the PE magic bytes. ("PE\0\0") + CurPtr = DH->AddressOfNewExeHeader; + if (memcmp(Data.data() + CurPtr, COFF::PEMagic, sizeof(COFF::PEMagic)) != + 0) { + return make_error<JITLinkError>("Incorrect PE magic"); + } + CurPtr += sizeof(COFF::PEMagic); + IsPE = true; + } + } + if (Data.size() < CurPtr + sizeof(object::coff_file_header)) + return make_error<JITLinkError>("Truncated COFF buffer"); + + const object::coff_file_header *COFFHeader = + reinterpret_cast<const object::coff_file_header *>(Data.data() + CurPtr); + const object::coff_bigobj_file_header *COFFBigObjHeader = nullptr; + + // Deal with bigobj file + if (!IsPE && COFFHeader->Machine == COFF::IMAGE_FILE_MACHINE_UNKNOWN && + COFFHeader->NumberOfSections == uint16_t(0xffff) && + Data.size() >= sizeof(object::coff_bigobj_file_header)) { + if (Data.size() < sizeof(object::coff_file_header)) { + return make_error<JITLinkError>("Truncated COFF buffer"); + } + COFFBigObjHeader = + reinterpret_cast<const object::coff_bigobj_file_header *>(Data.data() + + CurPtr); + + // Verify that we are dealing with bigobj. + if (COFFBigObjHeader->Version >= COFF::BigObjHeader::MinBigObjectVersion && + std::memcmp(COFFBigObjHeader->UUID, COFF::BigObjMagic, + sizeof(COFF::BigObjMagic)) == 0) { + COFFHeader = nullptr; + CurPtr += sizeof(object::coff_bigobj_file_header); + } else + COFFBigObjHeader = nullptr; + } + + uint16_t Machine = + COFFHeader ? COFFHeader->Machine : COFFBigObjHeader->Machine; + LLVM_DEBUG({ + dbgs() << "jitLink_COFF: PE = " << (IsPE ? "yes" : "no") + << ", bigobj = " << (COFFBigObjHeader ? "yes" : "no") + << ", identifier = \"" << ObjectBuffer.getBufferIdentifier() << "\" " + << "machine = " << getMachineName(Machine) << "\n"; + }); + + switch (Machine) { + case COFF::IMAGE_FILE_MACHINE_AMD64: + return createLinkGraphFromCOFFObject_x86_64(ObjectBuffer); + default: + return make_error<JITLinkError>( + "Unsupported target machine architecture in COFF object " + + ObjectBuffer.getBufferIdentifier() + ": " + getMachineName(Machine)); + } +} + +void link_COFF(std::unique_ptr<LinkGraph> G, + std::unique_ptr<JITLinkContext> Ctx) { + switch (G->getTargetTriple().getArch()) { + case Triple::x86_64: + link_COFF_x86_64(std::move(G), std::move(Ctx)); + return; + default: + Ctx->notifyFailed(make_error<JITLinkError>( + "Unsupported target machine architecture in COFF link graph " + + G->getName())); + return; + } +} + +} // end namespace jitlink +} // end namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp new file mode 100644 index 000000000000..43b9c2ba400b --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp @@ -0,0 +1,527 @@ +//=--------- COFFLinkGraphBuilder.cpp - COFF LinkGraph builder ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic COFF LinkGraph buliding code. +// +//===----------------------------------------------------------------------===// +#include "COFFLinkGraphBuilder.h" + +#define DEBUG_TYPE "jitlink" + +static const char *CommonSectionName = "__common"; + +namespace llvm { +namespace jitlink { + +COFFLinkGraphBuilder::COFFLinkGraphBuilder( + const object::COFFObjectFile &Obj, Triple TT, + LinkGraph::GetEdgeKindNameFunction GetEdgeKindName) + : Obj(Obj), + G(std::make_unique<LinkGraph>( + Obj.getFileName().str(), Triple(std::move(TT)), getPointerSize(Obj), + getEndianness(Obj), std::move(GetEdgeKindName))) { + LLVM_DEBUG({ + dbgs() << "Created COFFLinkGraphBuilder for \"" << Obj.getFileName() + << "\"\n"; + }); +} + +COFFLinkGraphBuilder::~COFFLinkGraphBuilder() = default; + +unsigned +COFFLinkGraphBuilder::getPointerSize(const object::COFFObjectFile &Obj) { + return Obj.getBytesInAddress(); +} + +support::endianness +COFFLinkGraphBuilder::getEndianness(const object::COFFObjectFile &Obj) { + return Obj.isLittleEndian() ? support::little : support::big; +} + +uint64_t COFFLinkGraphBuilder::getSectionSize(const object::COFFObjectFile &Obj, + const object::coff_section *Sec) { + // Consider the difference between executable form and object form. + // More information is inside COFFObjectFile::getSectionSize + if (Obj.getDOSHeader()) + return std::min(Sec->VirtualSize, Sec->SizeOfRawData); + return Sec->SizeOfRawData; +} + +uint64_t +COFFLinkGraphBuilder::getSectionAddress(const object::COFFObjectFile &Obj, + const object::coff_section *Section) { + return Section->VirtualAddress + Obj.getImageBase(); +} + +bool COFFLinkGraphBuilder::isComdatSection( + const object::coff_section *Section) { + return Section->Characteristics & COFF::IMAGE_SCN_LNK_COMDAT; +} + +Section &COFFLinkGraphBuilder::getCommonSection() { + if (!CommonSection) + CommonSection = + &G->createSection(CommonSectionName, MemProt::Read | MemProt::Write); + return *CommonSection; +} + +Expected<std::unique_ptr<LinkGraph>> COFFLinkGraphBuilder::buildGraph() { + if (!Obj.isRelocatableObject()) + return make_error<JITLinkError>("Object is not a relocatable COFF file"); + + if (auto Err = graphifySections()) + return std::move(Err); + + if (auto Err = graphifySymbols()) + return std::move(Err); + + if (auto Err = addRelocations()) + return std::move(Err); + + return std::move(G); +} + +StringRef +COFFLinkGraphBuilder::getCOFFSectionName(COFFSectionIndex SectionIndex, + const object::coff_section *Sec, + object::COFFSymbolRef Sym) { + switch (SectionIndex) { + case COFF::IMAGE_SYM_UNDEFINED: { + if (Sym.getValue()) + return "(common)"; + else + return "(external)"; + } + case COFF::IMAGE_SYM_ABSOLUTE: + return "(absolute)"; + case COFF::IMAGE_SYM_DEBUG: { + // Used with .file symbol + return "(debug)"; + } + default: { + // Non reserved regular section numbers + if (Expected<StringRef> SecNameOrErr = Obj.getSectionName(Sec)) + return *SecNameOrErr; + } + } + return ""; +} + +Error COFFLinkGraphBuilder::graphifySections() { + LLVM_DEBUG(dbgs() << " Creating graph sections...\n"); + + GraphBlocks.resize(Obj.getNumberOfSections() + 1); + // For each section... + for (COFFSectionIndex SecIndex = 1; + SecIndex <= static_cast<COFFSectionIndex>(Obj.getNumberOfSections()); + SecIndex++) { + Expected<const object::coff_section *> Sec = Obj.getSection(SecIndex); + if (!Sec) + return Sec.takeError(); + + StringRef SectionName; + if (Expected<StringRef> SecNameOrErr = Obj.getSectionName(*Sec)) + SectionName = *SecNameOrErr; + + bool IsDiscardable = + (*Sec)->Characteristics & + (COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_LNK_INFO); + if (IsDiscardable) { + LLVM_DEBUG(dbgs() << " " << SecIndex << ": \"" << SectionName + << "\" is discardable: " + "No graph section will be created.\n"); + continue; + } + + // FIXME: Skip debug info sections + + LLVM_DEBUG({ + dbgs() << " " + << "Creating section for \"" << SectionName << "\"\n"; + }); + + // Get the section's memory protection flags. + MemProt Prot = MemProt::None; + if ((*Sec)->Characteristics & COFF::IMAGE_SCN_MEM_EXECUTE) + Prot |= MemProt::Exec; + if ((*Sec)->Characteristics & COFF::IMAGE_SCN_MEM_READ) + Prot |= MemProt::Read; + if ((*Sec)->Characteristics & COFF::IMAGE_SCN_MEM_WRITE) + Prot |= MemProt::Write; + + // Look for existing sections first. + auto *GraphSec = G->findSectionByName(SectionName); + if (!GraphSec) + GraphSec = &G->createSection(SectionName, Prot); + if (GraphSec->getMemProt() != Prot) + return make_error<JITLinkError>("MemProt should match"); + + Block *B = nullptr; + if ((*Sec)->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + B = &G->createZeroFillBlock( + *GraphSec, getSectionSize(Obj, *Sec), + orc::ExecutorAddr(getSectionAddress(Obj, *Sec)), + (*Sec)->getAlignment(), 0); + else { + ArrayRef<uint8_t> Data; + if (auto Err = Obj.getSectionContents(*Sec, Data)) + return Err; + + B = &G->createContentBlock( + *GraphSec, + ArrayRef<char>(reinterpret_cast<const char *>(Data.data()), + Data.size()), + orc::ExecutorAddr(getSectionAddress(Obj, *Sec)), + (*Sec)->getAlignment(), 0); + } + + setGraphBlock(SecIndex, B); + } + + return Error::success(); +} + +Error COFFLinkGraphBuilder::graphifySymbols() { + LLVM_DEBUG(dbgs() << " Creating graph symbols...\n"); + + SymbolSets.resize(Obj.getNumberOfSections() + 1); + GraphSymbols.resize(Obj.getNumberOfSymbols()); + + for (COFFSymbolIndex SymIndex = 0; + SymIndex < static_cast<COFFSymbolIndex>(Obj.getNumberOfSymbols()); + SymIndex++) { + Expected<object::COFFSymbolRef> Sym = Obj.getSymbol(SymIndex); + if (!Sym) + return Sym.takeError(); + + StringRef SymbolName; + if (Expected<StringRef> SymNameOrErr = Obj.getSymbolName(*Sym)) + SymbolName = *SymNameOrErr; + + COFFSectionIndex SectionIndex = Sym->getSectionNumber(); + const object::coff_section *Sec = nullptr; + + if (!COFF::isReservedSectionNumber(SectionIndex)) { + auto SecOrErr = Obj.getSection(SectionIndex); + if (!SecOrErr) + return make_error<JITLinkError>( + "Invalid COFF section number:" + formatv("{0:d}: ", SectionIndex) + + " (" + toString(SecOrErr.takeError()) + ")"); + Sec = *SecOrErr; + } + + // Create jitlink symbol + jitlink::Symbol *GSym = nullptr; + if (Sym->isFileRecord()) + LLVM_DEBUG({ + dbgs() << " " << SymIndex << ": Skipping FileRecord symbol \"" + << SymbolName << "\" in " + << getCOFFSectionName(SectionIndex, Sec, *Sym) + << " (index: " << SectionIndex << ") \n"; + }); + else if (Sym->isUndefined()) { + LLVM_DEBUG({ + dbgs() << " " << SymIndex + << ": Creating external graph symbol for COFF symbol \"" + << SymbolName << "\" in " + << getCOFFSectionName(SectionIndex, Sec, *Sym) + << " (index: " << SectionIndex << ") \n"; + }); + GSym = + &G->addExternalSymbol(SymbolName, Sym->getValue(), Linkage::Strong); + } else if (Sym->isWeakExternal()) { + COFFSymbolIndex TagIndex = + Sym->getAux<object::coff_aux_weak_external>()->TagIndex; + assert(Sym->getAux<object::coff_aux_weak_external>()->Characteristics != + COFF::IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY && + "IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY is not supported."); + assert(Sym->getAux<object::coff_aux_weak_external>()->Characteristics != + COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY && + "IMAGE_WEAK_EXTERN_SEARCH_LIBRARY is not supported."); + WeakAliasRequests.push_back({SymIndex, TagIndex, SymbolName}); + } else { + Expected<jitlink::Symbol *> NewGSym = + createDefinedSymbol(SymIndex, SymbolName, *Sym, Sec); + if (!NewGSym) + return NewGSym.takeError(); + GSym = *NewGSym; + if (GSym) { + LLVM_DEBUG({ + dbgs() << " " << SymIndex + << ": Creating defined graph symbol for COFF symbol \"" + << SymbolName << "\" in " + << getCOFFSectionName(SectionIndex, Sec, *Sym) + << " (index: " << SectionIndex << ") \n"; + dbgs() << " " << *GSym << "\n"; + }); + } + } + + // Register the symbol + if (GSym) + setGraphSymbol(SectionIndex, SymIndex, *GSym); + SymIndex += Sym->getNumberOfAuxSymbols(); + } + + if (auto Err = flushWeakAliasRequests()) + return Err; + + if (auto Err = calculateImplicitSizeOfSymbols()) + return Err; + + return Error::success(); +} + +Error COFFLinkGraphBuilder::flushWeakAliasRequests() { + // Export the weak external symbols and alias it + for (auto &WeakAlias : WeakAliasRequests) { + if (auto *Target = getGraphSymbol(WeakAlias.Target)) { + Expected<object::COFFSymbolRef> AliasSymbol = + Obj.getSymbol(WeakAlias.Alias); + if (!AliasSymbol) + return AliasSymbol.takeError(); + + // FIXME: Support this when there's a way to handle this. + if (!Target->isDefined()) + return make_error<JITLinkError>("Weak external symbol with external " + "symbol as alternative not supported."); + + jitlink::Symbol *NewSymbol = &G->addDefinedSymbol( + Target->getBlock(), Target->getOffset(), WeakAlias.SymbolName, + Target->getSize(), Linkage::Weak, Scope::Default, + Target->isCallable(), false); + setGraphSymbol(AliasSymbol->getSectionNumber(), WeakAlias.Alias, + *NewSymbol); + LLVM_DEBUG({ + dbgs() << " " << WeakAlias.Alias + << ": Creating weak external symbol for COFF symbol \"" + << WeakAlias.SymbolName << "\" in section " + << AliasSymbol->getSectionNumber() << "\n"; + dbgs() << " " << *NewSymbol << "\n"; + }); + } else + return make_error<JITLinkError>("Weak symbol alias requested but actual " + "symbol not found for symbol " + + formatv("{0:d}", WeakAlias.Alias)); + } + return Error::success(); +} + +// In COFF, most of the defined symbols don't contain the size information. +// Hence, we calculate the "implicit" size of symbol by taking the delta of +// offsets of consecutive symbols within a block. We maintain a balanced tree +// set of symbols sorted by offset per each block in order to achieve +// logarithmic time complexity of sorted symbol insertion. Symbol is inserted to +// the set once it's processed in graphifySymbols. In this function, we iterate +// each collected symbol in sorted order and calculate the implicit size. +Error COFFLinkGraphBuilder::calculateImplicitSizeOfSymbols() { + for (COFFSectionIndex SecIndex = 1; + SecIndex <= static_cast<COFFSectionIndex>(Obj.getNumberOfSections()); + SecIndex++) { + auto &SymbolSet = SymbolSets[SecIndex]; + jitlink::Block *B = getGraphBlock(SecIndex); + orc::ExecutorAddrDiff LastOffset = B->getSize(); + orc::ExecutorAddrDiff LastDifferentOffset = B->getSize(); + orc::ExecutorAddrDiff LastSize = 0; + for (auto It = SymbolSet.rbegin(); It != SymbolSet.rend(); It++) { + orc::ExecutorAddrDiff Offset = It->first; + jitlink::Symbol *Symbol = It->second; + orc::ExecutorAddrDiff CandSize; + // Last offset can be same when aliasing happened + if (Symbol->getOffset() == LastOffset) + CandSize = LastSize; + else + CandSize = LastOffset - Offset; + + LLVM_DEBUG({ + if (Offset + Symbol->getSize() > LastDifferentOffset) + dbgs() << " Overlapping symbol range generated for the following " + "symbol:" + << "\n" + << " " << *Symbol << "\n"; + }); + (void)LastDifferentOffset; + if (LastOffset != Offset) + LastDifferentOffset = Offset; + LastSize = CandSize; + LastOffset = Offset; + if (Symbol->getSize()) { + // Non empty symbol can happen in COMDAT symbol. + // We don't consider the possibility of overlapping symbol range that + // could be introduced by disparity between inferred symbol size and + // defined symbol size because symbol size information is currently only + // used by jitlink-check where we have control to not make overlapping + // ranges. + continue; + } + + LLVM_DEBUG({ + if (!CandSize) + dbgs() << " Empty implicit symbol size generated for the following " + "symbol:" + << "\n" + << " " << *Symbol << "\n"; + }); + + Symbol->setSize(CandSize); + } + } + return Error::success(); +} + +Expected<Symbol *> COFFLinkGraphBuilder::createDefinedSymbol( + COFFSymbolIndex SymIndex, StringRef SymbolName, + object::COFFSymbolRef Symbol, const object::coff_section *Section) { + if (Symbol.isCommon()) { + // FIXME: correct alignment + return &G->addCommonSymbol(SymbolName, Scope::Default, getCommonSection(), + orc::ExecutorAddr(), Symbol.getValue(), + Symbol.getValue(), false); + } + if (Symbol.isAbsolute()) + return &G->addAbsoluteSymbol(SymbolName, + orc::ExecutorAddr(Symbol.getValue()), 0, + Linkage::Strong, Scope::Local, false); + + if (llvm::COFF::isReservedSectionNumber(Symbol.getSectionNumber())) + return make_error<JITLinkError>( + "Reserved section number used in regular symbol " + + formatv("{0:d}", SymIndex)); + + Block *B = getGraphBlock(Symbol.getSectionNumber()); + if (Symbol.isExternal()) { + // This is not a comdat sequence, export the symbol as it is + if (!isComdatSection(Section)) + return &G->addDefinedSymbol( + *B, Symbol.getValue(), SymbolName, 0, Linkage::Strong, Scope::Default, + Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION, false); + else { + if (!PendingComdatExport) + return make_error<JITLinkError>("No pending COMDAT export for symbol " + + formatv("{0:d}", SymIndex)); + if (PendingComdatExport->SectionIndex != Symbol.getSectionNumber()) + return make_error<JITLinkError>( + "COMDAT export section number mismatch for symbol " + + formatv("{0:d}", SymIndex)); + return exportCOMDATSymbol(SymIndex, SymbolName, Symbol); + } + } + + if (Symbol.getStorageClass() == COFF::IMAGE_SYM_CLASS_STATIC) { + const object::coff_aux_section_definition *Definition = + Symbol.getSectionDefinition(); + if (!Definition || !isComdatSection(Section)) { + // Handle typical static symbol + return &G->addDefinedSymbol( + *B, Symbol.getValue(), SymbolName, 0, Linkage::Strong, Scope::Local, + Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION, false); + } + if (Definition->Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) { + // FIXME: don't dead strip this when parent section is alive + return &G->addDefinedSymbol( + *B, Symbol.getValue(), SymbolName, 0, Linkage::Strong, Scope::Local, + Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION, false); + } + if (PendingComdatExport) + return make_error<JITLinkError>( + "COMDAT export request already exists before symbol " + + formatv("{0:d}", SymIndex)); + return createCOMDATExportRequest(SymIndex, Symbol, Definition); + } + return make_error<JITLinkError>("Unsupported storage class " + + formatv("{0:d}", Symbol.getStorageClass()) + + " in symbol " + formatv("{0:d}", SymIndex)); +} + +// COMDAT handling: +// When IMAGE_SCN_LNK_COMDAT flag is set in the flags of a section, +// the section is called a COMDAT section. It contains two symbols +// in a sequence that specifes the behavior. First symbol is the section +// symbol which contains the size and name of the section. It also contains +// selection type that specifies how duplicate of the symbol is handled. +// Second symbol is COMDAT symbol which usually defines the external name and +// data type. +// +// Since two symbols always come in a specific order, we initiate pending COMDAT +// export request when we encounter the first symbol and actually exports it +// when we process the second symbol. +// +// Process the first symbol of COMDAT sequence. +Expected<Symbol *> COFFLinkGraphBuilder::createCOMDATExportRequest( + COFFSymbolIndex SymIndex, object::COFFSymbolRef Symbol, + const object::coff_aux_section_definition *Definition) { + Block *B = getGraphBlock(Symbol.getSectionNumber()); + Linkage L = Linkage::Strong; + switch (Definition->Selection) { + case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: { + L = Linkage::Strong; + break; + } + case COFF::IMAGE_COMDAT_SELECT_ANY: { + L = Linkage::Weak; + break; + } + case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: + case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: { + // FIXME: Implement size/content validation when LinkGraph is able to + // handle this. + L = Linkage::Weak; + break; + } + case COFF::IMAGE_COMDAT_SELECT_LARGEST: { + // FIXME: Support IMAGE_COMDAT_SELECT_LARGEST when LinkGraph is able to + // handle this. + return make_error<JITLinkError>( + "IMAGE_COMDAT_SELECT_LARGEST is not supported."); + } + case COFF::IMAGE_COMDAT_SELECT_NEWEST: { + // Even link.exe doesn't support this selection properly. + return make_error<JITLinkError>( + "IMAGE_COMDAT_SELECT_NEWEST is not supported."); + } + default: { + return make_error<JITLinkError>("Invalid comdat selection type: " + + formatv("{0:d}", Definition->Selection)); + } + } + PendingComdatExport = {SymIndex, Symbol.getSectionNumber(), L}; + return &G->addAnonymousSymbol(*B, Symbol.getValue(), Definition->Length, + false, false); +} + +// Process the second symbol of COMDAT sequence. +Expected<Symbol *> +COFFLinkGraphBuilder::exportCOMDATSymbol(COFFSymbolIndex SymIndex, + StringRef SymbolName, + object::COFFSymbolRef Symbol) { + COFFSymbolIndex TargetIndex = PendingComdatExport->SymbolIndex; + Linkage L = PendingComdatExport->Linkage; + jitlink::Symbol *Target = getGraphSymbol(TargetIndex); + assert(Target && "COMDAT leaader is invalid."); + assert((llvm::count_if(G->defined_symbols(), + [&](const jitlink::Symbol *Sym) { + return Sym->getName() == SymbolName; + }) == 0) && + "Duplicate defined symbol"); + Target->setName(SymbolName); + Target->setLinkage(L); + Target->setCallable(Symbol.getComplexType() == + COFF::IMAGE_SYM_DTYPE_FUNCTION); + Target->setScope(Scope::Default); + LLVM_DEBUG({ + dbgs() << " " << SymIndex + << ": Exporting COMDAT graph symbol for COFF symbol \"" << SymbolName + << "\" in section " << Symbol.getSectionNumber() << "\n"; + dbgs() << " " << *Target << "\n"; + }); + PendingComdatExport = None; + return Target; +} + +} // namespace jitlink +} // namespace llvm
\ No newline at end of file diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h new file mode 100644 index 000000000000..4dc1b14dc4a2 --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h @@ -0,0 +1,199 @@ +//===----- COFFLinkGraphBuilder.h - COFF LinkGraph builder ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic COFF LinkGraph building code. +// +//===----------------------------------------------------------------------===// + +#ifndef LIB_EXECUTIONENGINE_JITLINK_COFFLINKGRAPHBUILDER_H +#define LIB_EXECUTIONENGINE_JITLINK_COFFLINKGRAPHBUILDER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/Object/COFF.h" + +#include "EHFrameSupportImpl.h" +#include "JITLinkGeneric.h" + +#define DEBUG_TYPE "jitlink" + +#include <list> + +namespace llvm { +namespace jitlink { + +class COFFLinkGraphBuilder { +public: + virtual ~COFFLinkGraphBuilder(); + Expected<std::unique_ptr<LinkGraph>> buildGraph(); + +protected: + using COFFSectionIndex = int32_t; + using COFFSymbolIndex = int32_t; + + COFFLinkGraphBuilder(const object::COFFObjectFile &Obj, Triple TT, + LinkGraph::GetEdgeKindNameFunction GetEdgeKindName); + + LinkGraph &getGraph() const { return *G; } + + const object::COFFObjectFile &getObject() const { return Obj; } + + virtual Error addRelocations() = 0; + + Error graphifySections(); + Error graphifySymbols(); + + void setGraphSymbol(COFFSectionIndex SecIndex, COFFSymbolIndex SymIndex, + Symbol &Sym) { + assert(!GraphSymbols[SymIndex] && "Duplicate symbol at index"); + GraphSymbols[SymIndex] = &Sym; + if (!COFF::isReservedSectionNumber(SecIndex)) + SymbolSets[SecIndex].insert({Sym.getOffset(), &Sym}); + } + + Symbol *getGraphSymbol(COFFSymbolIndex SymIndex) const { + if (SymIndex < 0 || + SymIndex >= static_cast<COFFSymbolIndex>(GraphSymbols.size())) + return nullptr; + return GraphSymbols[SymIndex]; + } + + void setGraphBlock(COFFSectionIndex SecIndex, Block *B) { + assert(!GraphBlocks[SecIndex] && "Duplicate section at index"); + assert(!COFF::isReservedSectionNumber(SecIndex) && "Invalid section index"); + GraphBlocks[SecIndex] = B; + } + + Block *getGraphBlock(COFFSectionIndex SecIndex) const { + if (SecIndex <= 0 || + SecIndex >= static_cast<COFFSectionIndex>(GraphSymbols.size())) + return nullptr; + return GraphBlocks[SecIndex]; + } + + object::COFFObjectFile::section_iterator_range sections() const { + return Obj.sections(); + } + + /// Traverse all matching relocation records in the given section. The handler + /// function Func should be callable with this signature: + /// Error(const object::RelocationRef&, + /// const object::SectionRef&, Section &) + /// + template <typename RelocHandlerFunction> + Error forEachRelocation(const object::SectionRef &RelSec, + RelocHandlerFunction &&Func, + bool ProcessDebugSections = false); + + /// Traverse all matching relocation records in the given section. Convenience + /// wrapper to allow passing a member function for the handler. + /// + template <typename ClassT, typename RelocHandlerMethod> + Error forEachRelocation(const object::SectionRef &RelSec, ClassT *Instance, + RelocHandlerMethod &&Method, + bool ProcessDebugSections = false) { + return forEachRelocation( + RelSec, + [Instance, Method](const auto &Rel, const auto &Target, auto &GS) { + return (Instance->*Method)(Rel, Target, GS); + }, + ProcessDebugSections); + } + +private: + // Pending comdat symbol export that is initiated by the first symbol of + // COMDAT sequence. + struct ComdatExportRequest { + COFFSymbolIndex SymbolIndex; + COFFSectionIndex SectionIndex; + jitlink::Linkage Linkage; + }; + Optional<ComdatExportRequest> PendingComdatExport; + + // This represents a pending request to create a weak external symbol with a + // name. + struct WeakAliasRequest { + COFFSymbolIndex Alias; + COFFSymbolIndex Target; + StringRef SymbolName; + }; + std::vector<WeakAliasRequest> WeakAliasRequests; + + // Per COFF section jitlink symbol set sorted by offset. + // Used for calculating implicit size of defined symbols. + using SymbolSet = std::set<std::pair<orc::ExecutorAddrDiff, Symbol *>>; + std::vector<SymbolSet> SymbolSets; + + Section &getCommonSection(); + + Expected<Symbol *> createDefinedSymbol(COFFSymbolIndex SymIndex, + StringRef SymbolName, + object::COFFSymbolRef Symbol, + const object::coff_section *Section); + Expected<Symbol *> createCOMDATExportRequest( + COFFSymbolIndex SymIndex, object::COFFSymbolRef Symbol, + const object::coff_aux_section_definition *Definition); + Expected<Symbol *> exportCOMDATSymbol(COFFSymbolIndex SymIndex, + StringRef SymbolName, + object::COFFSymbolRef Symbol); + Error flushWeakAliasRequests(); + Error calculateImplicitSizeOfSymbols(); + + static uint64_t getSectionAddress(const object::COFFObjectFile &Obj, + const object::coff_section *Section); + static uint64_t getSectionSize(const object::COFFObjectFile &Obj, + const object::coff_section *Section); + static bool isComdatSection(const object::coff_section *Section); + static unsigned getPointerSize(const object::COFFObjectFile &Obj); + static support::endianness getEndianness(const object::COFFObjectFile &Obj); + StringRef getCOFFSectionName(COFFSectionIndex SectionIndex, + const object::coff_section *Sec, + object::COFFSymbolRef Sym); + + const object::COFFObjectFile &Obj; + std::unique_ptr<LinkGraph> G; + + Section *CommonSection = nullptr; + std::vector<Block *> GraphBlocks; + std::vector<Symbol *> GraphSymbols; +}; + +template <typename RelocHandlerFunction> +Error COFFLinkGraphBuilder::forEachRelocation(const object::SectionRef &RelSec, + RelocHandlerFunction &&Func, + bool ProcessDebugSections) { + + auto COFFRelSect = Obj.getCOFFSection(RelSec); + + // Target sections have names in valid COFF object files. + Expected<StringRef> Name = Obj.getSectionName(COFFRelSect); + if (!Name) + return Name.takeError(); + LLVM_DEBUG(dbgs() << " " << *Name << ":\n"); + + // Lookup the link-graph node corresponding to the target section name. + auto *BlockToFix = getGraphBlock(RelSec.getIndex() + 1); + if (!BlockToFix) + return make_error<StringError>( + "Referencing a section that wasn't added to the graph: " + *Name, + inconvertibleErrorCode()); + + // Let the callee process relocation entries one by one. + for (const auto &R : RelSec.relocations()) + if (Error Err = Func(R, RelSec, *BlockToFix)) + return Err; + + LLVM_DEBUG(dbgs() << "\n"); + return Error::success(); +} + +} // end namespace jitlink +} // end namespace llvm + +#endif // LIB_EXECUTIONENGINE_JITLINK_COFFLINKGRAPHBUILDER_H diff --git a/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp new file mode 100644 index 000000000000..3d36ad1ed767 --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp @@ -0,0 +1,216 @@ +//===----- COFF_x86_64.cpp - JIT linker implementation for COFF/x86_64 ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// COFF/x86_64 jit-link implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/COFF_x86_64.h" +#include "COFFLinkGraphBuilder.h" +#include "EHFrameSupportImpl.h" +#include "JITLinkGeneric.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/ExecutionEngine/JITLink/x86_64.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Endian.h" + +#define DEBUG_TYPE "jitlink" + +using namespace llvm; +using namespace llvm::jitlink; + +namespace { + +class COFFJITLinker_x86_64 : public JITLinker<COFFJITLinker_x86_64> { + friend class JITLinker<COFFJITLinker_x86_64>; + +public: + COFFJITLinker_x86_64(std::unique_ptr<JITLinkContext> Ctx, + std::unique_ptr<LinkGraph> G, + PassConfiguration PassConfig) + : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {} + +private: + Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const { + return x86_64::applyFixup(G, B, E, nullptr); + } +}; + +class COFFLinkGraphBuilder_x86_64 : public COFFLinkGraphBuilder { +private: + uint64_t ImageBase = 0; + enum COFFX86RelocationKind { + COFFAddr32NB, + COFFRel32, + }; + + static Expected<COFFX86RelocationKind> + getRelocationKind(const uint32_t Type) { + switch (Type) { + case COFF::RelocationTypeAMD64::IMAGE_REL_AMD64_ADDR32NB: + return COFFAddr32NB; + case COFF::RelocationTypeAMD64::IMAGE_REL_AMD64_REL32: + return COFFRel32; + } + + return make_error<JITLinkError>("Unsupported x86_64 relocation:" + + formatv("{0:d}", Type)); + } + + Error addRelocations() override { + + LLVM_DEBUG(dbgs() << "Processing relocations:\n"); + + for (const auto &RelSect : sections()) + if (Error Err = COFFLinkGraphBuilder::forEachRelocation( + RelSect, this, &COFFLinkGraphBuilder_x86_64::addSingleRelocation)) + return Err; + + return Error::success(); + } + + uint64_t getImageBase() { + if (!ImageBase) { + ImageBase = std::numeric_limits<uint64_t>::max(); + for (const auto &Block : getGraph().blocks()) { + if (Block->getAddress().getValue()) + ImageBase = std::min(ImageBase, Block->getAddress().getValue()); + } + } + return ImageBase; + } + + Error addSingleRelocation(const object::RelocationRef &Rel, + const object::SectionRef &FixupSect, + Block &BlockToFix) { + + const object::coff_relocation *COFFRel = getObject().getCOFFRelocation(Rel); + auto SymbolIt = Rel.getSymbol(); + if (SymbolIt == getObject().symbol_end()) { + return make_error<StringError>( + formatv("Invalid symbol index in relocation entry. " + "index: {0}, section: {1}", + COFFRel->SymbolTableIndex, FixupSect.getIndex()), + inconvertibleErrorCode()); + } + + object::COFFSymbolRef COFFSymbol = getObject().getCOFFSymbol(*SymbolIt); + COFFSymbolIndex SymIndex = getObject().getSymbolIndex(COFFSymbol); + + Symbol *GraphSymbol = getGraphSymbol(SymIndex); + if (!GraphSymbol) + return make_error<StringError>( + formatv("Could not find symbol at given index, did you add it to " + "JITSymbolTable? index: {0}, section: {1}", + SymIndex, FixupSect.getIndex()), + inconvertibleErrorCode()); + + Expected<COFFX86RelocationKind> RelocKind = + getRelocationKind(Rel.getType()); + if (!RelocKind) + return RelocKind.takeError(); + + int64_t Addend = 0; + orc::ExecutorAddr FixupAddress = + orc::ExecutorAddr(FixupSect.getAddress()) + Rel.getOffset(); + Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress(); + + Edge::Kind Kind = Edge::Invalid; + + switch (*RelocKind) { + case COFFAddr32NB: { + Kind = x86_64::Pointer32; + Offset -= getImageBase(); + break; + } + case COFFRel32: { + Kind = x86_64::BranchPCRel32; + break; + } + }; + + Edge GE(Kind, Offset, *GraphSymbol, Addend); + LLVM_DEBUG({ + dbgs() << " "; + printEdge(dbgs(), BlockToFix, GE, x86_64::getEdgeKindName(Kind)); + dbgs() << "\n"; + }); + + BlockToFix.addEdge(std::move(GE)); + return Error::success(); + } + + /// Return the string name of the given COFF x86_64 edge kind. + const char *getCOFFX86RelocationKindName(COFFX86RelocationKind R) { + switch (R) { + case COFFAddr32NB: + return "COFFAddr32NB"; + case COFFRel32: + return "COFFRel32"; + } + } + +public: + COFFLinkGraphBuilder_x86_64(const object::COFFObjectFile &Obj, const Triple T) + : COFFLinkGraphBuilder(Obj, std::move(T), x86_64::getEdgeKindName) {} +}; + +Error buildTables_COFF_x86_64(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); + + x86_64::GOTTableManager GOT; + x86_64::PLTTableManager PLT(GOT); + visitExistingEdges(G, GOT, PLT); + return Error::success(); +} +} // namespace + +namespace llvm { +namespace jitlink { + +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromCOFFObject_x86_64(MemoryBufferRef ObjectBuffer) { + LLVM_DEBUG({ + dbgs() << "Building jitlink graph for new input " + << ObjectBuffer.getBufferIdentifier() << "...\n"; + }); + + auto COFFObj = object::ObjectFile::createCOFFObjectFile(ObjectBuffer); + if (!COFFObj) + return COFFObj.takeError(); + + return COFFLinkGraphBuilder_x86_64(**COFFObj, (*COFFObj)->makeTriple()) + .buildGraph(); +} + +void link_COFF_x86_64(std::unique_ptr<LinkGraph> G, + std::unique_ptr<JITLinkContext> Ctx) { + PassConfiguration Config; + const Triple &TT = G->getTargetTriple(); + if (Ctx->shouldAddDefaultTargetPasses(TT)) { + // Add a mark-live pass. + if (auto MarkLive = Ctx->getMarkLivePass(TT)) + Config.PrePrunePasses.push_back(std::move(MarkLive)); + else + Config.PrePrunePasses.push_back(markAllSymbolsLive); + + // Add an in-place GOT/Stubs/TLSInfoEntry build pass. + Config.PostPrunePasses.push_back(buildTables_COFF_x86_64); + + // Add GOT/Stubs optimizer pass. + Config.PreFixupPasses.push_back(x86_64::optimizeGOTAndStubAccesses); + } + + if (auto Err = Ctx->modifyPassConfig(*G, Config)) + return Ctx->notifyFailed(std::move(Err)); + + COFFJITLinker_x86_64::link(std::move(Ctx), std::move(G), std::move(Config)); +} + +} // namespace jitlink +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp index b1492cd74508..389fd14c0f29 100644 --- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp @@ -33,7 +33,7 @@ Error EHFrameEdgeFixer::operator()(LinkGraph &G) { if (!EHFrame) { LLVM_DEBUG({ dbgs() << "EHFrameEdgeFixer: No " << EHFrameSectionName - << " section. Nothing to do\n"; + << " section in \"" << G.getName() << "\". Nothing to do.\n"; }); return Error::success(); } @@ -44,7 +44,8 @@ Error EHFrameEdgeFixer::operator()(LinkGraph &G) { "EHFrameEdgeFixer only supports 32 and 64 bit targets"); LLVM_DEBUG({ - dbgs() << "EHFrameEdgeFixer: Processing " << EHFrameSectionName << "...\n"; + dbgs() << "EHFrameEdgeFixer: Processing " << EHFrameSectionName << " in \"" + << G.getName() << "\"...\n"; }); ParseContext PC(G); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp index 98da3f155c3e..7d67e5ef343a 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp @@ -63,6 +63,10 @@ private: ELFPrel64, ELFAdrGOTPage21, ELFLd64GOTLo12, + ELFTLSDescAdrPage21, + ELFTLSDescAddLo12, + ELFTLSDescLd64Lo12, + ELFTLSDescCall, }; static Expected<ELFAArch64RelocationKind> @@ -104,6 +108,14 @@ private: return ELFAdrGOTPage21; case ELF::R_AARCH64_LD64_GOT_LO12_NC: return ELFLd64GOTLo12; + case ELF::R_AARCH64_TLSDESC_ADR_PAGE21: + return ELFTLSDescAdrPage21; + case ELF::R_AARCH64_TLSDESC_ADD_LO12: + return ELFTLSDescAddLo12; + case ELF::R_AARCH64_TLSDESC_LD64_LO12: + return ELFTLSDescLd64Lo12; + case ELF::R_AARCH64_TLSDESC_CALL: + return ELFTLSDescCall; } return make_error<JITLinkError>( @@ -292,6 +304,21 @@ private: Kind = aarch64::GOTPageOffset12; break; } + case ELFTLSDescAdrPage21: { + Kind = aarch64::TLSDescPage21; + break; + } + case ELFTLSDescAddLo12: { + Kind = aarch64::TLSDescPageOffset12; + break; + } + case ELFTLSDescLd64Lo12: { + Kind = aarch64::TLSDescPageOffset12; + break; + } + case ELFTLSDescCall: { + return Error::success(); + } }; Edge GE(Kind, Offset, *GraphSymbol, Addend); @@ -302,6 +329,7 @@ private: }); BlockToFix.addEdge(std::move(GE)); + return Error::success(); } @@ -342,6 +370,14 @@ private: return "ELFAdrGOTPage21"; case ELFLd64GOTLo12: return "ELFLd64GOTLo12"; + case ELFTLSDescAdrPage21: + return "ELFTLSDescAdrPage21"; + case ELFTLSDescAddLo12: + return "ELFTLSDescAddLo12"; + case ELFTLSDescLd64Lo12: + return "ELFTLSDescLd64Lo12"; + case ELFTLSDescCall: + return "ELFTLSDescCall"; default: return getGenericEdgeKindName(static_cast<Edge::Kind>(R)); } @@ -354,12 +390,133 @@ public: aarch64::getEdgeKindName) {} }; +// TLS Info Builder. +class TLSInfoTableManager_ELF_aarch64 + : public TableManager<TLSInfoTableManager_ELF_aarch64> { +public: + static StringRef getSectionName() { return "$__TLSINFO"; } + + static const uint8_t TLSInfoEntryContent[16]; + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { return false; } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + // the TLS Info entry's key value will be written by the fixTLVSectionByName + // pass, so create mutable content. + auto &TLSInfoEntry = G.createMutableContentBlock( + getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), + orc::ExecutorAddr(), 8, 0); + TLSInfoEntry.addEdge(aarch64::Pointer64, 8, Target, 0); + return G.addAnonymousSymbol(TLSInfoEntry, 0, 16, false, false); + } + +private: + Section &getTLSInfoSection(LinkGraph &G) { + if (!TLSInfoTable) + TLSInfoTable = &G.createSection(getSectionName(), MemProt::Read); + return *TLSInfoTable; + } + + ArrayRef<char> getTLSInfoEntryContent() const { + return {reinterpret_cast<const char *>(TLSInfoEntryContent), + sizeof(TLSInfoEntryContent)}; + } + + Section *TLSInfoTable = nullptr; +}; + +const uint8_t TLSInfoTableManager_ELF_aarch64::TLSInfoEntryContent[16] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*pthread key */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /*data address*/ +}; + +// TLS Descriptor Builder. +class TLSDescTableManager_ELF_aarch64 + : public TableManager<TLSDescTableManager_ELF_aarch64> { +public: + TLSDescTableManager_ELF_aarch64( + TLSInfoTableManager_ELF_aarch64 &TLSInfoTableManager) + : TLSInfoTableManager(TLSInfoTableManager) {} + + static StringRef getSectionName() { return "$__TLSDESC"; } + + static const uint8_t TLSDescEntryContent[16]; + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + Edge::Kind KindToSet = Edge::Invalid; + switch (E.getKind()) { + case aarch64::TLSDescPage21: { + KindToSet = aarch64::Page21; + break; + } + case aarch64::TLSDescPageOffset12: { + KindToSet = aarch64::PageOffset12; + break; + } + default: + return false; + } + assert(KindToSet != Edge::Invalid && + "Fell through switch, but no new kind to set"); + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setKind(KindToSet); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + auto &EntryBlock = + G.createContentBlock(getTLSDescSection(G), getTLSDescBlockContent(), + orc::ExecutorAddr(), 8, 0); + EntryBlock.addEdge(aarch64::Pointer64, 0, getTLSDescResolver(G), 0); + EntryBlock.addEdge(aarch64::Pointer64, 8, + TLSInfoTableManager.getEntryForTarget(G, Target), 0); + return G.addAnonymousSymbol(EntryBlock, 0, 8, false, false); + } + +private: + Section &getTLSDescSection(LinkGraph &G) { + if (!GOTSection) + GOTSection = &G.createSection(getSectionName(), MemProt::Read); + return *GOTSection; + } + + Symbol &getTLSDescResolver(LinkGraph &G) { + if (!TLSDescResolver) + TLSDescResolver = + &G.addExternalSymbol("__tlsdesc_resolver", 8, Linkage::Strong); + return *TLSDescResolver; + } + + ArrayRef<char> getTLSDescBlockContent() { + return {reinterpret_cast<const char *>(TLSDescEntryContent), + sizeof(TLSDescEntryContent)}; + } + + Section *GOTSection = nullptr; + Symbol *TLSDescResolver = nullptr; + TLSInfoTableManager_ELF_aarch64 &TLSInfoTableManager; +}; + +const uint8_t TLSDescTableManager_ELF_aarch64::TLSDescEntryContent[16] = { + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, /*resolver function pointer*/ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 /*pointer to tls info*/ +}; + Error buildTables_ELF_aarch64(LinkGraph &G) { LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); aarch64::GOTTableManager GOT; aarch64::PLTTableManager PLT(GOT); - visitExistingEdges(G, GOT, PLT); + TLSInfoTableManager_ELF_aarch64 TLSInfo; + TLSDescTableManager_ELF_aarch64 TLSDesc(TLSInfo); + visitExistingEdges(G, GOT, PLT, TLSDesc, TLSInfo); return Error::success(); } @@ -406,7 +563,7 @@ void link_ELF_aarch64(std::unique_ptr<LinkGraph> G, else Config.PrePrunePasses.push_back(markAllSymbolsLive); - // Add an in-place GOT/Stubs build pass. + // Add an in-place GOT/TLS/Stubs build pass. Config.PostPrunePasses.push_back(buildTables_ELF_aarch64); } diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index 197ab71f5274..c7596efe2bb8 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -494,6 +494,30 @@ private: Block &BlockToFix) { using Base = ELFLinkGraphBuilder<ELFT>; + uint32_t Type = Rel.getType(false); + // We do not implement linker relaxation, except what is required for + // alignment (see below). + if (Type == llvm::ELF::R_RISCV_RELAX) + return Error::success(); + + int64_t Addend = Rel.r_addend; + if (Type == llvm::ELF::R_RISCV_ALIGN) { + uint64_t Alignment = PowerOf2Ceil(Addend); + // FIXME: Implement support for ensuring alignment together with linker + // relaxation; 2 bytes are guaranteed by the length of compressed + // instructions, so this does not need any action from our side. + if (Alignment > 2) + return make_error<JITLinkError>( + formatv("Unsupported relocation R_RISCV_ALIGN with alignment {0} " + "larger than 2 (addend: {1})", + Alignment, Addend)); + return Error::success(); + } + + Expected<riscv::EdgeKind_riscv> Kind = getRelocationKind(Type); + if (!Kind) + return Kind.takeError(); + uint32_t SymbolIndex = Rel.getSymbol(false); auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec); if (!ObjSymbol) @@ -508,12 +532,6 @@ private: Base::GraphSymbols.size()), inconvertibleErrorCode()); - uint32_t Type = Rel.getType(false); - Expected<riscv::EdgeKind_riscv> Kind = getRelocationKind(Type); - if (!Kind) - return Kind.takeError(); - - int64_t Addend = Rel.r_addend; auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress(); Edge GE(*Kind, Offset, *GraphSymbol, Addend); diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 43efe0725cfe..08fdc7c9e6b1 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -9,10 +9,10 @@ #include "llvm/ExecutionEngine/JITLink/JITLink.h" #include "llvm/BinaryFormat/Magic.h" +#include "llvm/ExecutionEngine/JITLink/COFF.h" #include "llvm/ExecutionEngine/JITLink/ELF.h" #include "llvm/ExecutionEngine/JITLink/MachO.h" #include "llvm/Support/Format.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -41,8 +41,6 @@ public: } }; -static ManagedStatic<JITLinkerErrorCategory> JITLinkerErrorCategory; - } // namespace namespace llvm { @@ -53,7 +51,8 @@ char JITLinkError::ID = 0; void JITLinkError::log(raw_ostream &OS) const { OS << ErrMsg; } std::error_code JITLinkError::convertToErrorCode() const { - return std::error_code(GenericJITLinkError, *JITLinkerErrorCategory); + static JITLinkerErrorCategory TheJITLinkerErrorCategory; + return std::error_code(GenericJITLinkError, TheJITLinkerErrorCategory); } const char *getGenericEdgeKindName(Edge::Kind K) { @@ -410,6 +409,8 @@ createLinkGraphFromObject(MemoryBufferRef ObjectBuffer) { return createLinkGraphFromMachOObject(ObjectBuffer); case file_magic::elf_relocatable: return createLinkGraphFromELFObject(ObjectBuffer); + case file_magic::coff_object: + return createLinkGraphFromCOFFObject(ObjectBuffer); default: return make_error<JITLinkError>("Unsupported file format"); }; @@ -421,6 +422,8 @@ void link(std::unique_ptr<LinkGraph> G, std::unique_ptr<JITLinkContext> Ctx) { return link_MachO(std::move(G), std::move(Ctx)); case Triple::ELF: return link_ELF(std::move(G), std::move(Ctx)); + case Triple::COFF: + return link_COFF(std::move(G), std::move(Ctx)); default: Ctx->notifyFailed(make_error<JITLinkError>("Unsupported object format")); }; diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp index dd50314d3ed7..04194318498f 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -425,7 +425,7 @@ private: else return TargetSymbolOrErr.takeError(); - Kind = aarch64::PointerToGOT; + Kind = aarch64::Delta32ToGOT; break; case MachODelta32: case MachODelta64: { diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp index 28a6f9ce90d9..9ecc71dfbb54 100644 --- a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp @@ -48,8 +48,12 @@ const char *getEdgeKindName(Edge::Kind R) { return "TLVPage21"; case TLVPageOffset12: return "TLVPageOffset12"; - case PointerToGOT: - return "PointerToGOT"; + case TLSDescPage21: + return "TLSDescPage21"; + case TLSDescPageOffset12: + return "TLSDescPageOffset12"; + case Delta32ToGOT: + return "Delta32ToGOT"; case PairedAddend: return "PairedAddend"; case LDRLiteral19: diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp index e476c549412a..e7ca636c83e9 100644 --- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp @@ -839,11 +839,13 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::registerInitSections( Error ELFNixPlatform::ELFNixPlatformPlugin::fixTLVSectionsAndEdges( jitlink::LinkGraph &G, JITDylib &JD) { - // TODO implement TLV support - for (auto *Sym : G.external_symbols()) + for (auto *Sym : G.external_symbols()) { if (Sym->getName() == "__tls_get_addr") { Sym->setName("___orc_rt_elfnix_tls_get_addr"); + } else if (Sym->getName() == "__tlsdesc_resolver") { + Sym->setName("___orc_rt_elfnix_tlsdesc_resolver"); } + } auto *TLSInfoEntrySection = G.findSectionByName("$__TLSINFO"); diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 6d67e6d87b56..1926ef1ecc72 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -666,8 +666,9 @@ Error LLJITBuilderState::prepareForConstruction() { // JIT linker. if (!CreateObjectLinkingLayer) { auto &TT = JTMB->getTargetTriple(); - if (TT.isOSBinFormatMachO() && - (TT.getArch() == Triple::aarch64 || TT.getArch() == Triple::x86_64)) { + if (TT.getArch() == Triple::riscv64 || + (TT.isOSBinFormatMachO() && + (TT.getArch() == Triple::aarch64 || TT.getArch() == Triple::x86_64))) { JTMB->setRelocationModel(Reloc::PIC_); JTMB->setCodeModel(CodeModel::Small); diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp index 394a555e453b..356b81b4f1c5 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp @@ -9,6 +9,7 @@ #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h" #include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h" #include "llvm/ExecutionEngine/Orc/MachOPlatform.h" +#include "llvm/Object/COFF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -145,6 +146,55 @@ getELFObjectFileSymbolInfo(ExecutionSession &ES, return I; } +static Expected<MaterializationUnit::Interface> +getCOFFObjectFileSymbolInfo(ExecutionSession &ES, + const object::COFFObjectFile &Obj) { + MaterializationUnit::Interface I; + + for (auto &Sym : Obj.symbols()) { + Expected<uint32_t> SymFlagsOrErr = Sym.getFlags(); + if (!SymFlagsOrErr) + // TODO: Test this error. + return SymFlagsOrErr.takeError(); + + // Skip symbols not defined in this object file. + if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined) + continue; + + // Skip symbols that are not global. + if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global)) + continue; + + // Skip symbols that have type SF_File. + if (auto SymType = Sym.getType()) { + if (*SymType == object::SymbolRef::ST_File) + continue; + } else + return SymType.takeError(); + + auto Name = Sym.getName(); + if (!Name) + return Name.takeError(); + + auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym); + if (!SymFlags) + return SymFlags.takeError(); + *SymFlags |= JITSymbolFlags::Exported; + auto COFFSym = Obj.getCOFFSymbol(Sym); + + // Weak external is always a function + if (COFFSym.isWeakExternal()) { + *SymFlags |= JITSymbolFlags::Callable; + } + + I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags); + } + + // FIXME: handle init symbols + + return I; +} + Expected<MaterializationUnit::Interface> getGenericObjectFileSymbolInfo(ExecutionSession &ES, const object::ObjectFile &Obj) { @@ -196,6 +246,8 @@ getObjectFileInterface(ExecutionSession &ES, MemoryBufferRef ObjBuffer) { return getMachOObjectFileSymbolInfo(ES, *MachOObj); else if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj->get())) return getELFObjectFileSymbolInfo(ES, *ELFObj); + else if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj->get())) + return getCOFFObjectFileSymbolInfo(ES, *COFFObj); return getGenericObjectFileSymbolInfo(ES, **Obj); } diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp index ef764a3f0d7f..da8aaad08cad 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp @@ -665,7 +665,7 @@ void OrcMips32_Base::writeIndirectStubsBlock( // // i.. - assert(stubAndPointerRangesOk<OrcAArch64>( + assert(stubAndPointerRangesOk<OrcMips32_Base>( StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && "PointersBlock is out of range"); @@ -884,7 +884,7 @@ void OrcMips64::writeIndirectStubsBlock( // // ... - assert(stubAndPointerRangesOk<OrcAArch64>( + assert(stubAndPointerRangesOk<OrcMips64>( StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && "PointersBlock is out of range"); diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp index fdad90cbcfb7..2cc2bddeb21a 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp @@ -12,7 +12,6 @@ #include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include <type_traits> @@ -70,7 +69,10 @@ public: } }; -static ManagedStatic<OrcErrorCategory> OrcErrCat; +OrcErrorCategory &getOrcErrCat() { + static OrcErrorCategory OrcErrCat; + return OrcErrCat; +} } // namespace namespace llvm { @@ -81,7 +83,7 @@ char JITSymbolNotFound::ID = 0; std::error_code orcError(OrcErrorCode ErrCode) { typedef std::underlying_type<OrcErrorCode>::type UT; - return std::error_code(static_cast<UT>(ErrCode), *OrcErrCat); + return std::error_code(static_cast<UT>(ErrCode), getOrcErrCat()); } DuplicateDefinition::DuplicateDefinition(std::string SymbolName) @@ -105,7 +107,7 @@ JITSymbolNotFound::JITSymbolNotFound(std::string SymbolName) std::error_code JITSymbolNotFound::convertToErrorCode() const { typedef std::underlying_type<OrcErrorCode>::type UT; return std::error_code(static_cast<UT>(OrcErrorCode::JITSymbolNotFound), - *OrcErrCat); + getOrcErrCat()); } void JITSymbolNotFound::log(raw_ostream &OS) const { diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp index ffa2969536e7..8296b03398a0 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp @@ -11,7 +11,6 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/ManagedStatic.h" #include <cstdint> #include <mutex> @@ -67,9 +66,6 @@ LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { using namespace llvm; using namespace llvm::orc; -// Serialize rendezvous with the debugger as well as access to shared data. -ManagedStatic<std::mutex> JITDebugLock; - // Register debug object, return error message or null for success. static void registerJITLoaderGDBImpl(const char *ObjAddr, size_t Size) { LLVM_DEBUG({ @@ -85,7 +81,9 @@ static void registerJITLoaderGDBImpl(const char *ObjAddr, size_t Size) { E->symfile_size = Size; E->prev_entry = nullptr; - std::lock_guard<std::mutex> Lock(*JITDebugLock); + // Serialize rendezvous with the debugger as well as access to shared data. + static std::mutex JITDebugLock; + std::lock_guard<std::mutex> Lock(JITDebugLock); // Insert this entry at the head of the list. jit_code_entry *NextEntry = __jit_debug_descriptor.first_entry; diff --git a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp index 4a236e183c8b..bb41bac32534 100644 --- a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp +++ b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Path.h" @@ -488,15 +487,14 @@ void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, } } -// There should be only a single event listener per process, otherwise perf gets -// confused. -llvm::ManagedStatic<PerfJITEventListener> PerfListener; - } // end anonymous namespace namespace llvm { JITEventListener *JITEventListener::createPerfJITEventListener() { - return &*PerfListener; + // There should be only a single event listener per process, otherwise perf + // gets confused. + static PerfJITEventListener PerfListener; + return &PerfListener; } } // namespace llvm diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 2e0cba849165..54ab00732330 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -19,7 +19,6 @@ #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/MSVCErrorWorkarounds.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include <mutex> @@ -51,8 +50,6 @@ public: } }; -static ManagedStatic<RuntimeDyldErrorCategory> RTDyldErrorCategory; - } char RuntimeDyldError::ID = 0; @@ -62,7 +59,8 @@ void RuntimeDyldError::log(raw_ostream &OS) const { } std::error_code RuntimeDyldError::convertToErrorCode() const { - return std::error_code(GenericRTDyldError, *RTDyldErrorCategory); + static RuntimeDyldErrorCategory RTDyldErrorCategory; + return std::error_code(GenericRTDyldError, RTDyldErrorCategory); } // Empty out-of-line virtual destructor as the key function. diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index da1102fc9f07..c702584b7a33 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -479,7 +479,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, *TargetPtr &= 0xfff8001fU; // Immediate:15:2 goes in bits 18:5 of TBZ, TBNZ - or32le(TargetPtr, (BranchImm & 0x0FFFFFFC) << 3); + or32le(TargetPtr, (BranchImm & 0x0000FFFC) << 3); break; } case ELF::R_AARCH64_CALL26: // fallthrough diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp index 6e8856f481af..0f846f7bfee5 100644 --- a/llvm/lib/Frontend/OpenMP/OMPContext.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp @@ -214,7 +214,7 @@ static int isVariantApplicableInContextHelper( Optional<bool> Result = HandleTrait(Property, IsActiveTrait); if (Result) - return Result.getValue(); + return Result.value(); } if (!DeviceSetOnly) { @@ -235,7 +235,7 @@ static int isVariantApplicableInContextHelper( Optional<bool> Result = HandleTrait(Property, FoundInOrder); if (Result) - return Result.getValue(); + return Result.value(); if (!FoundInOrder) { LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] Construct property " diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 9b08a24e14d4..574d9174bebf 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -791,6 +791,38 @@ void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name, Entry->setAlignment(Align(1)); } +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel( + const LocationDescription &Loc, Value *&Return, Value *Ident, + Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, + ArrayRef<Value *> KernelArgs, ArrayRef<Value *> NoWaitArgs) { + if (!updateToLocation(Loc)) + return Loc.IP; + + auto *KernelArgsPtr = + Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs, nullptr, "kernel_args"); + for (unsigned I = 0, Size = KernelArgs.size(); I != Size; ++I) { + llvm::Value *Arg = + Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr, I); + Builder.CreateAlignedStore( + KernelArgs[I], Arg, + M.getDataLayout().getPrefTypeAlign(KernelArgs[I]->getType())); + } + + bool HasNoWait = !NoWaitArgs.empty(); + SmallVector<Value *> OffloadingArgs{Ident, DeviceID, NumTeams, + NumThreads, HostPtr, KernelArgsPtr}; + if (HasNoWait) + OffloadingArgs.append(NoWaitArgs.begin(), NoWaitArgs.end()); + + Return = Builder.CreateCall( + HasNoWait + ? getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel_nowait) + : getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel), + OffloadingArgs); + + return Builder.saveIP(); +} + void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB) { @@ -1260,6 +1292,9 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, if (!updateToLocation(Loc)) return InsertPointTy(); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); // The current basic block is split into four basic blocks. After outlining, // they will be mapped as follows: // ``` @@ -1285,7 +1320,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.PostOutlineCB = [this, &Loc, Tied, Final](Function &OutlinedFn) { + OI.PostOutlineCB = [this, Ident, Tied, Final](Function &OutlinedFn) { // The input IR here looks like the following- // ``` // func @current_fn() { @@ -1324,9 +1359,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, // Arguments - `loc_ref` (Ident) and `gtid` (ThreadID) // call. - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadID = getOrCreateThreadID(Ident); // Argument - `flags` @@ -2834,7 +2866,8 @@ void OpenMPIRBuilder::unrollLoopHeuristic(DebugLoc, CanonicalLoopInfo *Loop) { }); } -void OpenMPIRBuilder::applySimd(DebugLoc, CanonicalLoopInfo *CanonicalLoop) { +void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, + ConstantInt *Simdlen) { LLVMContext &Ctx = Builder.getContext(); Function *F = CanonicalLoop->getFunction(); @@ -2879,6 +2912,11 @@ void OpenMPIRBuilder::applySimd(DebugLoc, CanonicalLoopInfo *CanonicalLoop) { AccessGroup}), MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), BoolConst})}); + if (Simdlen != nullptr) + addLoopMetadata( + CanonicalLoop, + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.width"), + ConstantAsMetadata::get(Simdlen)})); } /// Create the TargetMachine object to query the backend for optimization @@ -3962,6 +4000,8 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: llvm_unreachable("Unsupported atomic update operation"); } llvm_unreachable("Unsupported atomic update operation"); @@ -4126,20 +4166,37 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( assert(X.Var->getType()->isPointerTy() && "OMP atomic expects a pointer to target memory"); - assert((X.ElemTy->isIntegerTy() || X.ElemTy->isPointerTy()) && - "OMP atomic compare expected a integer scalar type"); // compare capture if (V.Var) { assert(V.Var->getType()->isPointerTy() && "v.var must be of pointer type"); assert(V.ElemTy == X.ElemTy && "x and v must be of same type"); } + bool IsInteger = E->getType()->isIntegerTy(); + if (Op == OMPAtomicCompareOp::EQ) { AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO); - AtomicCmpXchgInst *Result = - Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure); + AtomicCmpXchgInst *Result = nullptr; + if (!IsInteger) { + unsigned Addrspace = + cast<PointerType>(X.Var->getType())->getAddressSpace(); + IntegerType *IntCastTy = + IntegerType::get(M.getContext(), X.ElemTy->getScalarSizeInBits()); + Value *XBCast = + Builder.CreateBitCast(X.Var, IntCastTy->getPointerTo(Addrspace)); + Value *EBCast = Builder.CreateBitCast(E, IntCastTy); + Value *DBCast = Builder.CreateBitCast(D, IntCastTy); + Result = Builder.CreateAtomicCmpXchg(XBCast, EBCast, DBCast, MaybeAlign(), + AO, Failure); + } else { + Result = + Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure); + } + if (V.Var) { Value *OldValue = Builder.CreateExtractValue(Result, /*Idxs=*/0); + if (!IsInteger) + OldValue = Builder.CreateBitCast(OldValue, X.ElemTy); assert(OldValue->getType() == V.ElemTy && "OldValue and V must be of same type"); if (IsPostfixUpdate) { @@ -4213,19 +4270,29 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( // x = x <= expr ? x : expr; AtomicRMWInst::BinOp NewOp; if (IsXBinopExpr) { - if (X.IsSigned) - NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Min - : AtomicRMWInst::Max; - else - NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMin - : AtomicRMWInst::UMax; + if (IsInteger) { + if (X.IsSigned) + NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Min + : AtomicRMWInst::Max; + else + NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMin + : AtomicRMWInst::UMax; + } else { + NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::FMin + : AtomicRMWInst::FMax; + } } else { - if (X.IsSigned) - NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Max - : AtomicRMWInst::Min; - else - NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMax - : AtomicRMWInst::UMin; + if (IsInteger) { + if (X.IsSigned) + NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Max + : AtomicRMWInst::Min; + else + NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMax + : AtomicRMWInst::UMin; + } else { + NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::FMax + : AtomicRMWInst::FMin; + } } AtomicRMWInst *OldValue = @@ -4243,12 +4310,18 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( case AtomicRMWInst::UMax: Pred = CmpInst::ICMP_UGT; break; + case AtomicRMWInst::FMax: + Pred = CmpInst::FCMP_OGT; + break; case AtomicRMWInst::Min: Pred = CmpInst::ICMP_SLT; break; case AtomicRMWInst::UMin: Pred = CmpInst::ICMP_ULT; break; + case AtomicRMWInst::FMin: + Pred = CmpInst::FCMP_OLT; + break; default: llvm_unreachable("unexpected comparison op"); } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 596348ddb462..a29040b8c2aa 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1590,10 +1590,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, Out << ", "; } - if (CE->hasIndices()) - for (unsigned I : CE->getIndices()) - Out << ", " << I; - if (CE->isCast()) { Out << " to "; WriterCtx.TypePrinter->print(CE->getType(), Out); @@ -3542,8 +3538,8 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { Out << ", no_sanitize_address"; if (MD.NoHWAddress) Out << ", no_sanitize_hwaddress"; - if (MD.NoMemtag) - Out << ", no_sanitize_memtag"; + if (MD.Memtag) + Out << ", sanitize_memtag"; if (MD.IsDynInit) Out << ", sanitize_address_dyninit"; } @@ -4299,9 +4295,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) { bool PrintAllTypes = false; Type *TheType = Operand->getType(); - // Select, Store and ShuffleVector always print all types. - if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I) - || isa<ReturnInst>(I)) { + // Select, Store, ShuffleVector and CmpXchg always print all types. + if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I) || + isa<ReturnInst>(I) || isa<AtomicCmpXchgInst>(I)) { PrintAllTypes = true; } else { for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) { diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 41b4f2919221..98adff107cec 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1218,9 +1218,13 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, if (Instruction::isIntDivRem(Opcode) && C2Splat->isNullValue()) return PoisonValue::get(VTy); if (Constant *C1Splat = C1->getSplatValue()) { - return ConstantVector::getSplat( - VTy->getElementCount(), - ConstantExpr::get(Opcode, C1Splat, C2Splat)); + Constant *Res = + ConstantExpr::isDesirableBinOp(Opcode) + ? ConstantExpr::get(Opcode, C1Splat, C2Splat) + : ConstantFoldBinaryInstruction(Opcode, C1Splat, C2Splat); + if (!Res) + return nullptr; + return ConstantVector::getSplat(VTy->getElementCount(), Res); } } @@ -1237,7 +1241,12 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue()) return PoisonValue::get(VTy); - Result.push_back(ConstantExpr::get(Opcode, LHS, RHS)); + Constant *Res = ConstantExpr::isDesirableBinOp(Opcode) + ? ConstantExpr::get(Opcode, LHS, RHS) + : ConstantFoldBinaryInstruction(Opcode, LHS, RHS); + if (!Res) + return nullptr; + Result.push_back(Res); } return ConstantVector::get(Result); @@ -2218,9 +2227,15 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, : cast<FixedVectorType>(CurrIdx->getType())->getNumElements(), Factor); - NewIdxs[i] = ConstantExpr::getSRem(CurrIdx, Factor); + NewIdxs[i] = + ConstantFoldBinaryInstruction(Instruction::SRem, CurrIdx, Factor); + + Constant *Div = + ConstantFoldBinaryInstruction(Instruction::SDiv, CurrIdx, Factor); - Constant *Div = ConstantExpr::getSDiv(CurrIdx, Factor); + // We're working on either ConstantInt or vectors of ConstantInt, + // so these should always fold. + assert(NewIdxs[i] != nullptr && Div != nullptr && "Should have folded"); unsigned CommonExtendedWidth = std::max(PrevIdx->getType()->getScalarSizeInBits(), diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 0bf5e09d6647..f9800cc0c07c 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -547,8 +547,6 @@ void llvm::deleteConstant(Constant *C) { delete static_cast<InsertElementConstantExpr *>(C); else if (isa<ShuffleVectorConstantExpr>(C)) delete static_cast<ShuffleVectorConstantExpr *>(C); - else if (isa<InsertValueConstantExpr>(C)) - delete static_cast<InsertValueConstantExpr *>(C); else if (isa<GetElementPtrConstantExpr>(C)) delete static_cast<GetElementPtrConstantExpr *>(C); else if (isa<CompareConstantExpr>(C)) @@ -561,51 +559,6 @@ void llvm::deleteConstant(Constant *C) { } } -static bool canTrapImpl(const Constant *C, - SmallPtrSetImpl<const Constant *> &NonTrappingOps) { - assert(C->getType()->isFirstClassType() && - "Cannot evaluate non-first-class types!"); - // ConstantExpr or ConstantAggregate trap if any operands can trap. - if (isa<ConstantExpr>(C) || isa<ConstantAggregate>(C)) { - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { - const Constant *Op = cast<Constant>(C->getOperand(i)); - if (isa<ConstantExpr>(Op) || isa<ConstantAggregate>(Op)) { - if (NonTrappingOps.insert(Op).second && canTrapImpl(Op, NonTrappingOps)) - return true; - } - } - } - - // The only leafs that can trap are constant expressions. - const ConstantExpr *CE = dyn_cast<ConstantExpr>(C); - if (!CE) - return false; - - // Otherwise, only specific operations can trap. - switch (CE->getOpcode()) { - default: - return false; - case Instruction::SDiv: - case Instruction::SRem: - // Signed div/rem can trap for SignedMin / -1. - if (!CE->getOperand(0)->isNotMinSignedValue() && - (!isa<ConstantInt>(CE->getOperand(1)) || - CE->getOperand(1)->isAllOnesValue())) - return true; - LLVM_FALLTHROUGH; - case Instruction::UDiv: - case Instruction::URem: - // Div and rem can trap if the RHS is not known to be non-zero. - return !isa<ConstantInt>(CE->getOperand(1)) || - CE->getOperand(1)->isNullValue(); - } -} - -bool Constant::canTrap() const { - SmallPtrSet<const Constant *, 4> NonTrappingOps; - return canTrapImpl(this, NonTrappingOps); -} - /// Check if C contains a GlobalValue for which Predicate is true. static bool ConstHasGlobalValuePredicate(const Constant *C, @@ -1488,14 +1441,6 @@ bool ConstantExpr::isCompare() const { return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp; } -bool ConstantExpr::hasIndices() const { - return getOpcode() == Instruction::InsertValue; -} - -ArrayRef<unsigned> ConstantExpr::getIndices() const { - return cast<InsertValueConstantExpr>(this)->Indices; -} - unsigned ConstantExpr::getPredicate() const { return cast<CompareConstantExpr>(this)->predicate; } @@ -1539,9 +1484,6 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty, OnlyIfReducedTy); case Instruction::ExtractElement: return ConstantExpr::getExtractElement(Ops[0], Ops[1], OnlyIfReducedTy); - case Instruction::InsertValue: - return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices(), - OnlyIfReducedTy); case Instruction::FNeg: return ConstantExpr::getFNeg(Ops[0]); case Instruction::ShuffleVector: @@ -2324,6 +2266,8 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, // Check the operands for consistency first. assert(Instruction::isBinaryOp(Opcode) && "Invalid opcode in binary constant expression"); + assert(isSupportedBinOp(Opcode) && + "Binop not supported as constant expression"); assert(C1->getType() == C2->getType() && "Operand types in binary constant expression should match"); @@ -2378,6 +2322,60 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, return pImpl->ExprConstants.getOrCreate(C1->getType(), Key); } +bool ConstantExpr::isDesirableBinOp(unsigned Opcode) { + switch (Opcode) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + return false; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return true; + default: + llvm_unreachable("Argument must be binop opcode"); + } +} + +bool ConstantExpr::isSupportedBinOp(unsigned Opcode) { + switch (Opcode) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + return false; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return true; + default: + llvm_unreachable("Argument must be binop opcode"); + } +} + Constant *ConstantExpr::getSizeOf(Type* Ty) { // sizeof is implemented as: (i64) gep (Ty*)null, 1 // Note that a non-inbounds gep is used, as null isn't within any object. @@ -2517,7 +2515,7 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, if (InRangeIndex && *InRangeIndex < 63) SubClassOptionalData |= (*InRangeIndex + 1) << 1; const ConstantExprKeyType Key(Instruction::GetElementPtr, ArgVec, 0, - SubClassOptionalData, None, None, Ty); + SubClassOptionalData, None, Ty); LLVMContextImpl *pImpl = C->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(ReqTy, Key); @@ -2638,36 +2636,12 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, // Look up the constant in the table first to ensure uniqueness Constant *ArgVec[] = {V1, V2}; - ConstantExprKeyType Key(Instruction::ShuffleVector, ArgVec, 0, 0, None, Mask); + ConstantExprKeyType Key(Instruction::ShuffleVector, ArgVec, 0, 0, Mask); LLVMContextImpl *pImpl = ShufTy->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(ShufTy, Key); } -Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val, - ArrayRef<unsigned> Idxs, - Type *OnlyIfReducedTy) { - assert(Agg->getType()->isFirstClassType() && - "Non-first-class type for constant insertvalue expression"); - - assert(ExtractValueInst::getIndexedType(Agg->getType(), - Idxs) == Val->getType() && - "insertvalue indices invalid!"); - Type *ReqTy = Val->getType(); - - if (Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs)) - return FC; - - if (OnlyIfReducedTy == ReqTy) - return nullptr; - - Constant *ArgVec[] = { Agg, Val }; - const ConstantExprKeyType Key(Instruction::InsertValue, ArgVec, 0, 0, Idxs); - - LLVMContextImpl *pImpl = Agg->getContext().pImpl; - return pImpl->ExprConstants.getOrCreate(ReqTy, Key); -} - Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) { assert(C->getType()->isIntOrIntVectorTy() && "Cannot NEG a nonintegral value!"); @@ -2694,10 +2668,6 @@ Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2, return get(Instruction::Add, C1, C2, Flags); } -Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) { - return get(Instruction::FAdd, C1, C2); -} - Constant *ConstantExpr::getSub(Constant *C1, Constant *C2, bool HasNUW, bool HasNSW) { unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) | @@ -2705,10 +2675,6 @@ Constant *ConstantExpr::getSub(Constant *C1, Constant *C2, return get(Instruction::Sub, C1, C2, Flags); } -Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) { - return get(Instruction::FSub, C1, C2); -} - Constant *ConstantExpr::getMul(Constant *C1, Constant *C2, bool HasNUW, bool HasNSW) { unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) | @@ -2716,36 +2682,6 @@ Constant *ConstantExpr::getMul(Constant *C1, Constant *C2, return get(Instruction::Mul, C1, C2, Flags); } -Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) { - return get(Instruction::FMul, C1, C2); -} - -Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2, bool isExact) { - return get(Instruction::UDiv, C1, C2, - isExact ? PossiblyExactOperator::IsExact : 0); -} - -Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2, bool isExact) { - return get(Instruction::SDiv, C1, C2, - isExact ? PossiblyExactOperator::IsExact : 0); -} - -Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) { - return get(Instruction::FDiv, C1, C2); -} - -Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) { - return get(Instruction::URem, C1, C2); -} - -Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) { - return get(Instruction::SRem, C1, C2); -} - -Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) { - return get(Instruction::FRem, C1, C2); -} - Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) { return get(Instruction::And, C1, C2); } @@ -3517,9 +3453,6 @@ Instruction *ConstantExpr::getAsInstruction(Instruction *InsertBefore) const { return InsertElementInst::Create(Ops[0], Ops[1], Ops[2], "", InsertBefore); case Instruction::ExtractElement: return ExtractElementInst::Create(Ops[0], Ops[1], "", InsertBefore); - case Instruction::InsertValue: - return InsertValueInst::Create(Ops[0], Ops[1], getIndices(), "", - InsertBefore); case Instruction::ShuffleVector: return new ShuffleVectorInst(Ops[0], Ops[1], getShuffleMask(), "", InsertBefore); diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h index 21ef1c0d9f64..1d74e2d49f35 100644 --- a/llvm/lib/IR/ConstantsContext.h +++ b/llvm/lib/IR/ConstantsContext.h @@ -209,37 +209,6 @@ public: } }; -/// InsertValueConstantExpr - This class is private to -/// Constants.cpp, and is used behind the scenes to implement -/// insertvalue constant exprs. -class InsertValueConstantExpr final : public ConstantExpr { -public: - InsertValueConstantExpr(Constant *Agg, Constant *Val, - ArrayRef<unsigned> IdxList, Type *DestTy) - : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2), - Indices(IdxList.begin(), IdxList.end()) { - Op<0>() = Agg; - Op<1>() = Val; - } - - // allocate space for exactly one operand - void *operator new(size_t S) { return User::operator new(S, 2); } - void operator delete(void *Ptr) { User::operator delete(Ptr); } - - /// Indices - These identify the position for the insertion. - const SmallVector<unsigned, 4> Indices; - - /// Transparently provide more efficient getOperand methods. - DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - - static bool classof(const ConstantExpr *CE) { - return CE->getOpcode() == Instruction::InsertValue; - } - static bool classof(const Value *V) { - return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)); - } -}; - /// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is /// used behind the scenes to implement getelementpr constant exprs. class GetElementPtrConstantExpr final : public ConstantExpr { @@ -333,11 +302,6 @@ struct OperandTraits<ShuffleVectorConstantExpr> DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value) template <> -struct OperandTraits<InsertValueConstantExpr> - : public FixedNumOperandTraits<InsertValueConstantExpr, 2> {}; -DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value) - -template <> struct OperandTraits<GetElementPtrConstantExpr> : public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {}; @@ -472,7 +436,6 @@ private: uint8_t SubclassOptionalData; uint16_t SubclassData; ArrayRef<Constant *> Ops; - ArrayRef<unsigned> Indexes; ArrayRef<int> ShuffleMask; Type *ExplicitTy; @@ -482,12 +445,6 @@ private: return None; } - static ArrayRef<unsigned> getIndicesIfValid(const ConstantExpr *CE) { - if (CE->hasIndices()) - return CE->getIndices(); - return None; - } - static Type *getSourceElementTypeIfValid(const ConstantExpr *CE) { if (auto *GEPCE = dyn_cast<GetElementPtrConstantExpr>(CE)) return GEPCE->getSourceElementType(); @@ -498,18 +455,17 @@ public: ConstantExprKeyType(unsigned Opcode, ArrayRef<Constant *> Ops, unsigned short SubclassData = 0, unsigned short SubclassOptionalData = 0, - ArrayRef<unsigned> Indexes = None, ArrayRef<int> ShuffleMask = None, Type *ExplicitTy = nullptr) : Opcode(Opcode), SubclassOptionalData(SubclassOptionalData), - SubclassData(SubclassData), Ops(Ops), Indexes(Indexes), - ShuffleMask(ShuffleMask), ExplicitTy(ExplicitTy) {} + SubclassData(SubclassData), Ops(Ops), ShuffleMask(ShuffleMask), + ExplicitTy(ExplicitTy) {} ConstantExprKeyType(ArrayRef<Constant *> Operands, const ConstantExpr *CE) : Opcode(CE->getOpcode()), SubclassOptionalData(CE->getRawSubclassOptionalData()), SubclassData(CE->isCompare() ? CE->getPredicate() : 0), Ops(Operands), - Indexes(getIndicesIfValid(CE)), ShuffleMask(getShuffleMaskIfValid(CE)), + ShuffleMask(getShuffleMaskIfValid(CE)), ExplicitTy(getSourceElementTypeIfValid(CE)) {} ConstantExprKeyType(const ConstantExpr *CE, @@ -517,7 +473,7 @@ public: : Opcode(CE->getOpcode()), SubclassOptionalData(CE->getRawSubclassOptionalData()), SubclassData(CE->isCompare() ? CE->getPredicate() : 0), - Indexes(getIndicesIfValid(CE)), ShuffleMask(getShuffleMaskIfValid(CE)), + ShuffleMask(getShuffleMaskIfValid(CE)), ExplicitTy(getSourceElementTypeIfValid(CE)) { assert(Storage.empty() && "Expected empty storage"); for (unsigned I = 0, E = CE->getNumOperands(); I != E; ++I) @@ -528,8 +484,7 @@ public: bool operator==(const ConstantExprKeyType &X) const { return Opcode == X.Opcode && SubclassData == X.SubclassData && SubclassOptionalData == X.SubclassOptionalData && Ops == X.Ops && - Indexes == X.Indexes && ShuffleMask == X.ShuffleMask && - ExplicitTy == X.ExplicitTy; + ShuffleMask == X.ShuffleMask && ExplicitTy == X.ExplicitTy; } bool operator==(const ConstantExpr *CE) const { @@ -544,8 +499,6 @@ public: for (unsigned I = 0, E = Ops.size(); I != E; ++I) if (Ops[I] != CE->getOperand(I)) return false; - if (Indexes != getIndicesIfValid(CE)) - return false; if (ShuffleMask != getShuffleMaskIfValid(CE)) return false; if (ExplicitTy != getSourceElementTypeIfValid(CE)) @@ -557,7 +510,6 @@ public: return hash_combine( Opcode, SubclassOptionalData, SubclassData, hash_combine_range(Ops.begin(), Ops.end()), - hash_combine_range(Indexes.begin(), Indexes.end()), hash_combine_range(ShuffleMask.begin(), ShuffleMask.end()), ExplicitTy); } @@ -583,8 +535,6 @@ public: return new InsertElementConstantExpr(Ops[0], Ops[1], Ops[2]); case Instruction::ShuffleVector: return new ShuffleVectorConstantExpr(Ops[0], Ops[1], ShuffleMask); - case Instruction::InsertValue: - return new InsertValueConstantExpr(Ops[0], Ops[1], Indexes, Ty); case Instruction::GetElementPtr: return GetElementPtrConstantExpr::Create(ExplicitTy, Ops[0], Ops.slice(1), Ty, SubclassOptionalData); diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 4b9189ca5baa..08b7b0e1f956 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -74,13 +74,16 @@ void LLVMDisposeMessage(char *Message) { /*===-- Operations on contexts --------------------------------------------===*/ -static ManagedStatic<LLVMContext> GlobalContext; +static LLVMContext &getGlobalContext() { + static LLVMContext GlobalContext; + return GlobalContext; +} LLVMContextRef LLVMContextCreate() { return wrap(new LLVMContext()); } -LLVMContextRef LLVMGetGlobalContext() { return wrap(&*GlobalContext); } +LLVMContextRef LLVMGetGlobalContext() { return wrap(&getGlobalContext()); } void LLVMContextSetDiagnosticHandler(LLVMContextRef C, LLVMDiagnosticHandler Handler, @@ -251,7 +254,7 @@ LLVMDiagnosticSeverity LLVMGetDiagInfoSeverity(LLVMDiagnosticInfoRef DI) { /*===-- Operations on modules ---------------------------------------------===*/ LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) { - return wrap(new Module(ModuleID, *GlobalContext)); + return wrap(new Module(ModuleID, getGlobalContext())); } LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, @@ -1571,11 +1574,6 @@ LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant, unwrap<Constant>(RHSConstant))); } -LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getFAdd(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant), unwrap<Constant>(RHSConstant))); @@ -1593,11 +1591,6 @@ LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant, unwrap<Constant>(RHSConstant))); } -LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant), unwrap<Constant>(RHSConstant))); @@ -1615,53 +1608,6 @@ LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant, unwrap<Constant>(RHSConstant))); } -LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getFMul(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - -LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - -LLVMValueRef LLVMConstExactUDiv(LLVMValueRef LHSConstant, - LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getExactUDiv(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - -LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - -LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant, - LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getExactSDiv(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - -LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - -LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - -LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - -LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { - return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant), - unwrap<Constant>(RHSConstant))); -} - LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant), unwrap<Constant>(RHSConstant))); @@ -1875,14 +1821,6 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant, IntMask)); } -LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, - LLVMValueRef ElementValueConstant, - unsigned *IdxList, unsigned NumIdx) { - return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant), - unwrap<Constant>(ElementValueConstant), - makeArrayRef(IdxList, NumIdx))); -} - LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, const char *Constraints, LLVMBool HasSideEffects, @@ -2843,6 +2781,10 @@ void LLVMInstructionEraseFromParent(LLVMValueRef Inst) { unwrap<Instruction>(Inst)->eraseFromParent(); } +void LLVMDeleteInstruction(LLVMValueRef Inst) { + unwrap<Instruction>(Inst)->deleteValue(); +} + LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst) { if (ICmpInst *I = dyn_cast<ICmpInst>(unwrap(Inst))) return (LLVMIntPredicate)I->getPredicate(); @@ -3079,8 +3021,6 @@ unsigned LLVMGetNumIndices(LLVMValueRef Inst) { return EV->getNumIndices(); if (auto *IV = dyn_cast<InsertValueInst>(I)) return IV->getNumIndices(); - if (auto *CE = dyn_cast<ConstantExpr>(I)) - return CE->getIndices().size(); llvm_unreachable( "LLVMGetNumIndices applies only to extractvalue and insertvalue!"); } @@ -3091,8 +3031,6 @@ const unsigned *LLVMGetIndices(LLVMValueRef Inst) { return EV->getIndices().data(); if (auto *IV = dyn_cast<InsertValueInst>(I)) return IV->getIndices().data(); - if (auto *CE = dyn_cast<ConstantExpr>(I)) - return CE->getIndices().data(); llvm_unreachable( "LLVMGetIndices applies only to extractvalue and insertvalue!"); } @@ -3664,6 +3602,8 @@ static AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMAtomicRMWBinOp BinOp) { case LLVMAtomicRMWBinOpUMin: return AtomicRMWInst::UMin; case LLVMAtomicRMWBinOpFAdd: return AtomicRMWInst::FAdd; case LLVMAtomicRMWBinOpFSub: return AtomicRMWInst::FSub; + case LLVMAtomicRMWBinOpFMax: return AtomicRMWInst::FMax; + case LLVMAtomicRMWBinOpFMin: return AtomicRMWInst::FMin; } llvm_unreachable("Invalid LLVMAtomicRMWBinOp value!"); @@ -3684,6 +3624,8 @@ static LLVMAtomicRMWBinOp mapToLLVMRMWBinOp(AtomicRMWInst::BinOp BinOp) { case AtomicRMWInst::UMin: return LLVMAtomicRMWBinOpUMin; case AtomicRMWInst::FAdd: return LLVMAtomicRMWBinOpFAdd; case AtomicRMWInst::FSub: return LLVMAtomicRMWBinOpFSub; + case AtomicRMWInst::FMax: return LLVMAtomicRMWBinOpFMax; + case AtomicRMWInst::FMin: return LLVMAtomicRMWBinOpFMin; default: break; } diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp index 203ad6dae1ff..c75b1aa7c1d6 100644 --- a/llvm/lib/IR/InlineAsm.cpp +++ b/llvm/lib/IR/InlineAsm.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" #include <algorithm> #include <cassert> #include <cctype> @@ -33,9 +34,10 @@ InlineAsm::InlineAsm(FunctionType *FTy, const std::string &asmString, AsmString(asmString), Constraints(constraints), FTy(FTy), HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack), Dialect(asmDialect), CanThrow(canThrow) { +#ifndef NDEBUG // Do various checks on the constraint string and type. - assert(Verify(getFunctionType(), constraints) && - "Function type not legal for constraints!"); + cantFail(verify(getFunctionType(), constraints)); +#endif } InlineAsm *InlineAsm::get(FunctionType *FTy, StringRef AsmString, @@ -248,15 +250,19 @@ InlineAsm::ParseConstraints(StringRef Constraints) { return Result; } -/// Verify - Verify that the specified constraint string is reasonable for the -/// specified function type, and otherwise validate the constraint string. -bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) { - if (Ty->isVarArg()) return false; +static Error makeStringError(const char *Msg) { + return createStringError(errc::invalid_argument, Msg); +} + +Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) { + if (Ty->isVarArg()) + return makeStringError("inline asm cannot be variadic"); ConstraintInfoVector Constraints = ParseConstraints(ConstStr); // Error parsing constraints. - if (Constraints.empty() && !ConstStr.empty()) return false; + if (Constraints.empty() && !ConstStr.empty()) + return makeStringError("failed to parse constraints"); unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0; unsigned NumIndirect = 0; @@ -265,7 +271,9 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) { switch (Constraint.Type) { case InlineAsm::isOutput: if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0) - return false; // outputs before inputs and clobbers. + return makeStringError("output constraint occurs after input " + "or clobber constraint"); + if (!Constraint.isIndirect) { ++NumOutputs; break; @@ -273,7 +281,9 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) { ++NumIndirect; LLVM_FALLTHROUGH; // We fall through for Indirect Outputs. case InlineAsm::isInput: - if (NumClobbers) return false; // inputs before clobbers. + if (NumClobbers) + return makeStringError("input constraint occurs after clobber " + "constraint"); ++NumInputs; break; case InlineAsm::isClobber: @@ -284,18 +294,23 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) { switch (NumOutputs) { case 0: - if (!Ty->getReturnType()->isVoidTy()) return false; + if (!Ty->getReturnType()->isVoidTy()) + return makeStringError("inline asm without outputs must return void"); break; case 1: - if (Ty->getReturnType()->isStructTy()) return false; + if (Ty->getReturnType()->isStructTy()) + return makeStringError("inline asm with one output cannot return struct"); break; default: StructType *STy = dyn_cast<StructType>(Ty->getReturnType()); if (!STy || STy->getNumElements() != NumOutputs) - return false; + return makeStringError("number of output constraints does not match " + "number of return struct elements"); break; } - if (Ty->getNumParams() != NumInputs) return false; - return true; + if (Ty->getNumParams() != NumInputs) + return makeStringError("number of input constraints does not match number " + "of parameters"); + return Error::success(); } diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 6a91edb75dd2..b333f40f3ce9 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1696,6 +1696,10 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) { return "fadd"; case AtomicRMWInst::FSub: return "fsub"; + case AtomicRMWInst::FMax: + return "fmax"; + case AtomicRMWInst::FMin: + return "fmin"; case AtomicRMWInst::BAD_BINOP: return "<invalid operation>"; } @@ -4423,10 +4427,9 @@ MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() { assert(SI.getNumSuccessors() == Weights->size() && "num of prof branch_weights must accord with num of successors"); - bool AllZeroes = - all_of(Weights.getValue(), [](uint32_t W) { return W == 0; }); + bool AllZeroes = all_of(Weights.value(), [](uint32_t W) { return W == 0; }); - if (AllZeroes || Weights.getValue().size() < 2) + if (AllZeroes || Weights.value().size() < 2) return nullptr; return MDBuilder(SI.getParent()->getContext()).createBranchWeights(*Weights); @@ -4460,8 +4463,8 @@ SwitchInstProfUpdateWrapper::removeCase(SwitchInst::CaseIt I) { // Copy the last case to the place of the removed one and shrink. // This is tightly coupled with the way SwitchInst::removeCase() removes // the cases in SwitchInst::removeCase(CaseIt). - Weights.getValue()[I->getCaseIndex() + 1] = Weights.getValue().back(); - Weights.getValue().pop_back(); + Weights.value()[I->getCaseIndex() + 1] = Weights.value().back(); + Weights.value().pop_back(); } return SI.removeCase(I); } @@ -4474,10 +4477,10 @@ void SwitchInstProfUpdateWrapper::addCase( if (!Weights && W && *W) { Changed = true; Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0); - Weights.getValue()[SI.getNumSuccessors() - 1] = *W; + Weights.value()[SI.getNumSuccessors() - 1] = *W; } else if (Weights) { Changed = true; - Weights.getValue().push_back(W.value_or(0)); + Weights.value().push_back(W.value_or(0)); } if (Weights) assert(SI.getNumSuccessors() == Weights->size() && diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index b132a9dcb812..65a9a32ad2c5 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -223,13 +223,13 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const { bool ConstrainedFPIntrinsic::isDefaultFPEnvironment() const { Optional<fp::ExceptionBehavior> Except = getExceptionBehavior(); if (Except) { - if (Except.getValue() != fp::ebIgnore) + if (Except.value() != fp::ebIgnore) return false; } Optional<RoundingMode> Rounding = getRoundingMode(); if (Rounding) { - if (Rounding.getValue() != RoundingMode::NearestTiesToEven) + if (Rounding.value() != RoundingMode::NearestTiesToEven) return false; } @@ -364,13 +364,13 @@ VPIntrinsic::getVectorLengthParamPos(Intrinsic::ID IntrinsicID) { MaybeAlign VPIntrinsic::getPointerAlignment() const { Optional<unsigned> PtrParamOpt = getMemoryPointerParamPos(getIntrinsicID()); assert(PtrParamOpt && "no pointer argument!"); - return getParamAlign(PtrParamOpt.getValue()); + return getParamAlign(PtrParamOpt.value()); } /// \return The pointer operand of this load,store, gather or scatter. Value *VPIntrinsic::getMemoryPointerParam() const { if (auto PtrParamOpt = getMemoryPointerParamPos(getIntrinsicID())) - return getArgOperand(PtrParamOpt.getValue()); + return getArgOperand(PtrParamOpt.value()); return nullptr; } @@ -391,7 +391,7 @@ Value *VPIntrinsic::getMemoryDataParam() const { auto DataParamOpt = getMemoryDataParamPos(getIntrinsicID()); if (!DataParamOpt) return nullptr; - return getArgOperand(DataParamOpt.getValue()); + return getArgOperand(DataParamOpt.value()); } Optional<unsigned> VPIntrinsic::getMemoryDataParamPos(Intrinsic::ID VPID) { diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 06b3a3afef9d..d7aaf0008564 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/TypeSize.h" #include <cassert> #include <utility> @@ -241,7 +240,7 @@ void LLVMContextImpl::getSyncScopeNames( /// singleton OptBisect if not explicitly set. OptPassGate &LLVMContextImpl::getOptPassGate() const { if (!OPG) - OPG = &(*OptBisector); + OPG = &getOptBisector(); return *OPG; } @@ -260,7 +259,7 @@ bool LLVMContextImpl::getOpaquePointers() { } void LLVMContextImpl::setOpaquePointers(bool OP) { - assert((!OpaquePointers || OpaquePointers.getValue() == OP) && + assert((!OpaquePointers || OpaquePointers.value() == OP) && "Cannot change opaque pointers mode once set"); OpaquePointers = OP; } diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index ae2401026ebf..2a1a514922fd 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -592,13 +592,6 @@ MDNode::Header::~Header() { (void)(O - 1)->~MDOperand(); } -void *MDNode::Header::getLargePtr() const { - static_assert(alignof(LargeStorageVector) <= alignof(Header), - "LargeStorageVector too strongly aligned"); - return reinterpret_cast<char *>(const_cast<Header *>(this)) - - sizeof(LargeStorageVector); -} - void *MDNode::Header::getSmallPtr() { static_assert(alignof(MDOperand) <= alignof(Header), "MDOperand too strongly aligned"); diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 5cd74d53da75..b51ea45f651a 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -714,6 +714,18 @@ void Module::setStackProtectorGuardReg(StringRef Reg) { addModuleFlag(ModFlagBehavior::Error, "stack-protector-guard-reg", ID); } +StringRef Module::getStackProtectorGuardSymbol() const { + Metadata *MD = getModuleFlag("stack-protector-guard-symbol"); + if (auto *MDS = dyn_cast_or_null<MDString>(MD)) + return MDS->getString(); + return {}; +} + +void Module::setStackProtectorGuardSymbol(StringRef Symbol) { + MDString *ID = MDString::get(getContext(), Symbol); + addModuleFlag(ModFlagBehavior::Error, "stack-protector-guard-symbol", ID); +} + int Module::getStackProtectorGuardOffset() const { Metadata *MD = getModuleFlag("stack-protector-guard-offset"); if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(MD)) diff --git a/llvm/lib/IR/OptBisect.cpp b/llvm/lib/IR/OptBisect.cpp index 418311eac814..c9054dba344a 100644 --- a/llvm/lib/IR/OptBisect.cpp +++ b/llvm/lib/IR/OptBisect.cpp @@ -23,7 +23,7 @@ using namespace llvm; static cl::opt<int> OptBisectLimit("opt-bisect-limit", cl::Hidden, cl::init(OptBisect::Disabled), cl::Optional, cl::cb<void, int>([](int Limit) { - llvm::OptBisector->setLimit(Limit); + llvm::getOptBisector().setLimit(Limit); }), cl::desc("Maximum optimization to perform")); @@ -52,4 +52,7 @@ bool OptBisect::checkPass(const StringRef PassName, const int OptBisect::Disabled; -ManagedStatic<OptBisect> llvm::OptBisector; +OptBisect &llvm::getOptBisector() { + static OptBisect OptBisector; + return OptBisector; +} diff --git a/llvm/lib/IR/PassRegistry.cpp b/llvm/lib/IR/PassRegistry.cpp index 94f607afec47..6c22fcd34769 100644 --- a/llvm/lib/IR/PassRegistry.cpp +++ b/llvm/lib/IR/PassRegistry.cpp @@ -15,21 +15,15 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Pass.h" #include "llvm/PassInfo.h" -#include "llvm/Support/ManagedStatic.h" #include <cassert> #include <memory> #include <utility> using namespace llvm; -// FIXME: We use ManagedStatic to erase the pass registrar on shutdown. -// Unfortunately, passes are registered with static ctors, and having -// llvm_shutdown clear this map prevents successful resurrection after -// llvm_shutdown is run. Ideally we should find a solution so that we don't -// leak the map, AND can still resurrect after shutdown. -static ManagedStatic<PassRegistry> PassRegistryObj; PassRegistry *PassRegistry::getPassRegistry() { - return &*PassRegistryObj; + static PassRegistry PassRegistryObj; + return &PassRegistryObj; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/InterfaceStub/IFSHandler.cpp b/llvm/lib/InterfaceStub/IFSHandler.cpp index 71189e79360e..4edaeb74d6a7 100644 --- a/llvm/lib/InterfaceStub/IFSHandler.cpp +++ b/llvm/lib/InterfaceStub/IFSHandler.cpp @@ -202,8 +202,8 @@ Error ifs::writeIFSToOutputStream(raw_ostream &OS, const IFSStub &Stub) { yaml::Output YamlOut(OS, nullptr, /*WrapColumn =*/0); std::unique_ptr<IFSStubTriple> CopyStub(new IFSStubTriple(Stub)); if (Stub.Target.Arch) { - CopyStub->Target.ArchString = std::string( - ELF::convertEMachineToArchName(Stub.Target.Arch.getValue())); + CopyStub->Target.ArchString = + std::string(ELF::convertEMachineToArchName(Stub.Target.Arch.value())); } IFSTarget Target = Stub.Target; @@ -222,36 +222,35 @@ Error ifs::overrideIFSTarget(IFSStub &Stub, Optional<IFSArch> OverrideArch, Optional<std::string> OverrideTriple) { std::error_code OverrideEC(1, std::generic_category()); if (OverrideArch) { - if (Stub.Target.Arch && - Stub.Target.Arch.getValue() != OverrideArch.getValue()) { + if (Stub.Target.Arch && Stub.Target.Arch.value() != OverrideArch.value()) { return make_error<StringError>( "Supplied Arch conflicts with the text stub", OverrideEC); } - Stub.Target.Arch = OverrideArch.getValue(); + Stub.Target.Arch = OverrideArch.value(); } if (OverrideEndianness) { if (Stub.Target.Endianness && - Stub.Target.Endianness.getValue() != OverrideEndianness.getValue()) { + Stub.Target.Endianness.value() != OverrideEndianness.value()) { return make_error<StringError>( "Supplied Endianness conflicts with the text stub", OverrideEC); } - Stub.Target.Endianness = OverrideEndianness.getValue(); + Stub.Target.Endianness = OverrideEndianness.value(); } if (OverrideBitWidth) { if (Stub.Target.BitWidth && - Stub.Target.BitWidth.getValue() != OverrideBitWidth.getValue()) { + Stub.Target.BitWidth.value() != OverrideBitWidth.value()) { return make_error<StringError>( "Supplied BitWidth conflicts with the text stub", OverrideEC); } - Stub.Target.BitWidth = OverrideBitWidth.getValue(); + Stub.Target.BitWidth = OverrideBitWidth.value(); } if (OverrideTriple) { if (Stub.Target.Triple && - Stub.Target.Triple.getValue() != OverrideTriple.getValue()) { + Stub.Target.Triple.value() != OverrideTriple.value()) { return make_error<StringError>( "Supplied Triple conflicts with the text stub", OverrideEC); } - Stub.Target.Triple = OverrideTriple.getValue(); + Stub.Target.Triple = OverrideTriple.value(); } return Error::success(); } diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 5d50e92ae377..e248e58e4e4e 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -81,17 +81,19 @@ extern cl::opt<bool> NoPGOWarnMismatch; exit(1); } -Error Config::addSaveTemps(std::string OutputFileName, - bool UseInputModulePath) { +Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath, + const DenseSet<StringRef> &SaveTempsArgs) { ShouldDiscardValueNames = false; std::error_code EC; - ResolutionFile = - std::make_unique<raw_fd_ostream>(OutputFileName + "resolution.txt", EC, - sys::fs::OpenFlags::OF_TextWithCRLF); - if (EC) { - ResolutionFile.reset(); - return errorCodeToError(EC); + if (SaveTempsArgs.empty() || SaveTempsArgs.contains("resolution")) { + ResolutionFile = + std::make_unique<raw_fd_ostream>(OutputFileName + "resolution.txt", EC, + sys::fs::OpenFlags::OF_TextWithCRLF); + if (EC) { + ResolutionFile.reset(); + return errorCodeToError(EC); + } } auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { @@ -125,14 +127,7 @@ Error Config::addSaveTemps(std::string OutputFileName, }; }; - setHook("0.preopt", PreOptModuleHook); - setHook("1.promote", PostPromoteModuleHook); - setHook("2.internalize", PostInternalizeModuleHook); - setHook("3.import", PostImportModuleHook); - setHook("4.opt", PostOptModuleHook); - setHook("5.precodegen", PreCodeGenModuleHook); - - CombinedIndexHook = + auto SaveCombinedIndex = [=](const ModuleSummaryIndex &Index, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { std::string Path = OutputFileName + "index.bc"; @@ -152,6 +147,31 @@ Error Config::addSaveTemps(std::string OutputFileName, return true; }; + if (SaveTempsArgs.empty()) { + setHook("0.preopt", PreOptModuleHook); + setHook("1.promote", PostPromoteModuleHook); + setHook("2.internalize", PostInternalizeModuleHook); + setHook("3.import", PostImportModuleHook); + setHook("4.opt", PostOptModuleHook); + setHook("5.precodegen", PreCodeGenModuleHook); + CombinedIndexHook = SaveCombinedIndex; + } else { + if (SaveTempsArgs.contains("preopt")) + setHook("0.preopt", PreOptModuleHook); + if (SaveTempsArgs.contains("promote")) + setHook("1.promote", PostPromoteModuleHook); + if (SaveTempsArgs.contains("internalize")) + setHook("2.internalize", PostInternalizeModuleHook); + if (SaveTempsArgs.contains("import")) + setHook("3.import", PostImportModuleHook); + if (SaveTempsArgs.contains("opt")) + setHook("4.opt", PostOptModuleHook); + if (SaveTempsArgs.contains("precodegen")) + setHook("5.precodegen", PreCodeGenModuleHook); + if (SaveTempsArgs.contains("combinedindex")) + CombinedIndexHook = SaveCombinedIndex; + } + return Error::success(); } diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index 5a819e2d736c..9e89cce8312e 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1357,11 +1357,18 @@ Error IRLinker::linkModuleFlagsMetadata() { DstM.getModuleIdentifier() + "'"); } - auto replaceDstValue = [&](MDNode *New) { + auto ensureDistinctOp = [&](MDNode *DstValue) { + assert(isa<MDTuple>(DstValue) && + "Expected MDTuple when appending module flags"); + if (DstValue->isDistinct()) + return dyn_cast<MDTuple>(DstValue); + MDTuple *New = MDTuple::getDistinct( + DstM.getContext(), SmallVector<Metadata *, 4>(DstValue->operands())); Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New}; - MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps); + MDNode *Flag = MDTuple::getDistinct(DstM.getContext(), FlagOps); DstModFlags->setOperand(DstIndex, Flag); Flags[ID].first = Flag; + return New; }; // Emit a warning if the values differ and either source or destination @@ -1438,25 +1445,20 @@ Error IRLinker::linkModuleFlagsMetadata() { break; } case Module::Append: { - MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2)); + MDTuple *DstValue = ensureDistinctOp(cast<MDNode>(DstOp->getOperand(2))); MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2)); - SmallVector<Metadata *, 8> MDs; - MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands()); - MDs.append(DstValue->op_begin(), DstValue->op_end()); - MDs.append(SrcValue->op_begin(), SrcValue->op_end()); - - replaceDstValue(MDNode::get(DstM.getContext(), MDs)); + for (const auto &O : SrcValue->operands()) + DstValue->push_back(O); break; } case Module::AppendUnique: { SmallSetVector<Metadata *, 16> Elts; - MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2)); + MDTuple *DstValue = ensureDistinctOp(cast<MDNode>(DstOp->getOperand(2))); MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2)); Elts.insert(DstValue->op_begin(), DstValue->op_end()); Elts.insert(SrcValue->op_begin(), SrcValue->op_end()); - - replaceDstValue(MDNode::get(DstM.getContext(), - makeArrayRef(Elts.begin(), Elts.end()))); + for (auto I = DstValue->getNumOperands(); I < Elts.size(); I++) + DstValue->push_back(Elts[I]); break; } } diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index eda495693595..78204ffe4c3b 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -145,7 +145,7 @@ struct ELFWriter { uint64_t align(unsigned Alignment); bool maybeWriteCompression(uint64_t Size, - SmallVectorImpl<char> &CompressedContents, + SmallVectorImpl<uint8_t> &CompressedContents, bool ZLibStyle, unsigned Alignment); public: @@ -819,7 +819,7 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx, // Include the debug info compression header. bool ELFWriter::maybeWriteCompression( - uint64_t Size, SmallVectorImpl<char> &CompressedContents, bool ZLibStyle, + uint64_t Size, SmallVectorImpl<uint8_t> &CompressedContents, bool ZLibStyle, unsigned Alignment) { if (ZLibStyle) { uint64_t HdrSize = @@ -875,9 +875,11 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, raw_svector_ostream VecOS(UncompressedData); Asm.writeSectionData(VecOS, &Section, Layout); - SmallVector<char, 128> CompressedContents; - zlib::compress(StringRef(UncompressedData.data(), UncompressedData.size()), - CompressedContents); + SmallVector<uint8_t, 128> CompressedContents; + compression::zlib::compress( + makeArrayRef(reinterpret_cast<uint8_t *>(UncompressedData.data()), + UncompressedData.size()), + CompressedContents); bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z; if (!maybeWriteCompression(UncompressedData.size(), CompressedContents, @@ -896,7 +898,7 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, // Add "z" prefix to section name. This is zlib-gnu style. MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str()); } - W.OS << CompressedContents; + W.OS << toStringRef(CompressedContents); } void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index 4be84ca7feb5..d312e3521c9e 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -773,7 +773,7 @@ MCSectionXCOFF *MCContext::getXCOFFSection( // Do the lookup. If we have a hit, return it. auto IterBool = XCOFFUniquingMap.insert(std::make_pair( IsDwarfSec - ? XCOFFSectionKey(Section.str(), DwarfSectionSubtypeFlags.getValue()) + ? XCOFFSectionKey(Section.str(), DwarfSectionSubtypeFlags.value()) : XCOFFSectionKey(Section.str(), CsectProp->MappingClass), nullptr)); auto &Entry = *IterBool.first; @@ -806,7 +806,7 @@ MCSectionXCOFF *MCContext::getXCOFFSection( if (IsDwarfSec) Result = new (XCOFFAllocator.Allocate()) MCSectionXCOFF(QualName->getUnqualifiedName(), Kind, QualName, - DwarfSectionSubtypeFlags.getValue(), Begin, CachedName, + DwarfSectionSubtypeFlags.value(), Begin, CachedName, MultiSymbolsAllowed); else Result = new (XCOFFAllocator.Allocate()) diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp index 0c041186936d..cf98cb8ff59f 100644 --- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -88,8 +88,8 @@ bool XCOFFSymbolInfo::operator<(const XCOFFSymbolInfo &SymInfo) const { return SymInfo.StorageMappingClass.has_value(); if (StorageMappingClass) { - return getSMCPriority(StorageMappingClass.getValue()) < - getSMCPriority(SymInfo.StorageMappingClass.getValue()); + return getSMCPriority(StorageMappingClass.value()) < + getSMCPriority(SymInfo.StorageMappingClass.value()); } return false; diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 04a234be3b47..563d3487ef50 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -660,6 +660,8 @@ EndStmt: Type = ELF::SHT_LLVM_SYMPART; else if (TypeName == "llvm_bb_addr_map") Type = ELF::SHT_LLVM_BB_ADDR_MAP; + else if (TypeName == "llvm_offloading") + Type = ELF::SHT_LLVM_OFFLOADING; else if (TypeName.getAsInteger(0, Type)) return TokError("unknown section type"); } diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 8c582d225e30..694ea395fdec 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -1585,6 +1585,16 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); return false; } + // Parse directional local label references. + if (Identifier.equals_insensitive("@b") || + Identifier.equals_insensitive("@f")) { + bool Before = Identifier.equals_insensitive("@b"); + MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before); + if (Before && Sym->isUndefined()) + return Error(FirstTokenLoc, "Expected @@ label before @B reference"); + Res = MCSymbolRefExpr::create(Sym, getContext()); + return false; + } // Parse symbol variant. std::pair<StringRef, StringRef> Split; if (!MAI.useParensForSymbolVariant()) { @@ -1714,34 +1724,10 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, case AsmToken::BigNum: return TokError("literal value out of range for directive"); case AsmToken::Integer: { - SMLoc Loc = getTok().getLoc(); int64_t IntVal = getTok().getIntVal(); Res = MCConstantExpr::create(IntVal, getContext()); EndLoc = Lexer.getTok().getEndLoc(); Lex(); // Eat token. - // Look for 'b' or 'f' following an Integer as a directional label. - if (Lexer.getKind() == AsmToken::Identifier) { - StringRef IDVal = getTok().getString(); - // Look up the symbol variant if used. - std::pair<StringRef, StringRef> Split = IDVal.split('@'); - MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - if (Split.first.size() != IDVal.size()) { - Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); - if (Variant == MCSymbolRefExpr::VK_Invalid) - return TokError("invalid variant '" + Split.second + "'"); - IDVal = Split.first; - } - if (IDVal == "f" || IDVal == "b") { - MCSymbol *Sym = - Ctx.getDirectionalLocalSymbol(IntVal, IDVal == "b"); - Res = MCSymbolRefExpr::create(Sym, Variant, getContext()); - if (IDVal == "b" && Sym->isUndefined()) - return Error(Loc, "directional label undefined"); - DirLabels.push_back(std::make_tuple(Loc, CppHashInfo, Sym)); - EndLoc = Lexer.getTok().getEndLoc(); - Lex(); // Eat identifier. - } - } return false; } case AsmToken::String: { @@ -2042,6 +2028,9 @@ bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, .CaseLower("and", AsmToken::Amp) .CaseLower("not", AsmToken::Exclaim) .CaseLower("or", AsmToken::Pipe) + .CaseLower("xor", AsmToken::Caret) + .CaseLower("shl", AsmToken::LessLess) + .CaseLower("shr", AsmToken::GreaterGreater) .CaseLower("eq", AsmToken::EqualEqual) .CaseLower("ne", AsmToken::ExclaimEqual) .CaseLower("lt", AsmToken::Less) @@ -2110,29 +2099,9 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, AsmToken ID = getTok(); SMLoc IDLoc = ID.getLoc(); StringRef IDVal; - int64_t LocalLabelVal = -1; if (Lexer.is(AsmToken::HashDirective)) return parseCppHashLineFilenameComment(IDLoc); - // Allow an integer followed by a ':' as a directional local label. - if (Lexer.is(AsmToken::Integer)) { - LocalLabelVal = getTok().getIntVal(); - if (LocalLabelVal < 0) { - if (!TheCondState.Ignore) { - Lex(); // always eat a token - return Error(IDLoc, "unexpected token at start of statement"); - } - IDVal = ""; - } else { - IDVal = getTok().getString(); - Lex(); // Consume the integer token to be used as an identifier token. - if (Lexer.getKind() != AsmToken::Colon) { - if (!TheCondState.Ignore) { - Lex(); // always eat a token - return Error(IDLoc, "unexpected token at start of statement"); - } - } - } - } else if (Lexer.is(AsmToken::Dot)) { + if (Lexer.is(AsmToken::Dot)) { // Treat '.' as a valid identifier in this context. Lex(); IDVal = "."; @@ -2257,19 +2226,22 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, // FIXME: This doesn't diagnose assignment to a symbol which has been // implicitly marked as external. MCSymbol *Sym; - if (LocalLabelVal == -1) { - if (ParsingMSInlineAsm && SI) { - StringRef RewrittenLabel = - SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true); - assert(!RewrittenLabel.empty() && - "We should have an internal name here."); - Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(), - RewrittenLabel); - IDVal = RewrittenLabel; - } + if (ParsingMSInlineAsm && SI) { + StringRef RewrittenLabel = + SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true); + assert(!RewrittenLabel.empty() && + "We should have an internal name here."); + Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(), + RewrittenLabel); + IDVal = RewrittenLabel; + } + // Handle directional local labels + if (IDVal == "@@") { + Sym = Ctx.createDirectionalLocalSymbol(0); + } else { Sym = getContext().getOrCreateSymbol(IDVal); - } else - Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal); + } + // End of Labels should be treated as end of line for lexing // purposes but that information is not available to the Lexer who // does not understand Labels. This may cause us to see a Hash @@ -4241,7 +4213,7 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure, size_t FieldIndex = 0; if (EndToken) { // Initialize all fields with given initializers. - while (getTok().isNot(EndToken.getValue()) && + while (getTok().isNot(EndToken.value()) && FieldIndex < Structure.Fields.size()) { const FieldInfo &Field = Structure.Fields[FieldIndex++]; if (parseOptionalToken(AsmToken::Comma)) { @@ -4273,10 +4245,10 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure, } if (EndToken) { - if (EndToken.getValue() == AsmToken::Greater) + if (EndToken.value() == AsmToken::Greater) return parseAngleBracketClose(); - return parseToken(EndToken.getValue()); + return parseToken(EndToken.value()); } return false; diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp index 98eb7eada064..71c8e6f02f8e 100644 --- a/llvm/lib/MC/MCSchedule.cpp +++ b/llvm/lib/MC/MCSchedule.cpp @@ -96,10 +96,10 @@ MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI, continue; unsigned NumUnits = SM.getProcResource(I->ProcResourceIdx)->NumUnits; double Temp = NumUnits * 1.0 / I->Cycles; - Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp; + Throughput = Throughput ? std::min(Throughput.value(), Temp) : Temp; } if (Throughput) - return 1.0 / Throughput.getValue(); + return 1.0 / Throughput.value(); // If no throughput value was calculated, assume that we can execute at the // maximum issue width scaled by number of micro-ops for the schedule class. @@ -140,10 +140,10 @@ MCSchedModel::getReciprocalThroughput(unsigned SchedClass, if (!I->getCycles()) continue; double Temp = countPopulation(I->getUnits()) * 1.0 / I->getCycles(); - Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp; + Throughput = Throughput ? std::min(Throughput.value(), Temp) : Temp; } if (Throughput) - return 1.0 / Throughput.getValue(); + return 1.0 / Throughput.value(); // If there are no execution resources specified for this class, then assume // that it can execute at the maximum default issue width. diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp index 27dc1826819b..077cee132338 100644 --- a/llvm/lib/MC/MCSectionELF.cpp +++ b/llvm/lib/MC/MCSectionELF.cpp @@ -167,6 +167,8 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << "llvm_bb_addr_map"; else if (Type == ELF::SHT_LLVM_BB_ADDR_MAP_V0) OS << "llvm_bb_addr_map_v0"; + else if (Type == ELF::SHT_LLVM_OFFLOADING) + OS << "llvm_offloading"; else report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) + " for section " + getName()); diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp index ee8fa04c421f..9a35ac69c47c 100644 --- a/llvm/lib/MC/MCSectionXCOFF.cpp +++ b/llvm/lib/MC/MCSectionXCOFF.cpp @@ -110,8 +110,8 @@ void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, // XCOFF debug sections. if (getKind().isMetadata() && isDwarfSect()) { - OS << "\n\t.dwsect " - << format("0x%" PRIx32, getDwarfSubtypeFlags().getValue()) << '\n'; + OS << "\n\t.dwsect " << format("0x%" PRIx32, getDwarfSubtypeFlags().value()) + << '\n'; OS << MAI.getPrivateLabelPrefix() << getName() << ':' << '\n'; return; } diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp index 9d8883a15c0b..77321829e614 100644 --- a/llvm/lib/ObjCopy/ConfigManager.cpp +++ b/llvm/lib/ObjCopy/ConfigManager.cpp @@ -20,9 +20,9 @@ Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const { !Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() || !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || - Common.ExtractDWO || Common.PreserveDates || Common.StripDWO || - Common.StripNonAlloc || Common.StripSections || Common.Weaken || - Common.DecompressDebugSections || + !Common.SetSectionType.empty() || Common.ExtractDWO || + Common.PreserveDates || Common.StripDWO || Common.StripNonAlloc || + Common.StripSections || Common.Weaken || Common.DecompressDebugSections || Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty()) return createStringError(llvm::errc::invalid_argument, "option is not supported for COFF"); @@ -38,9 +38,10 @@ Expected<const MachOConfig &> ConfigManager::getMachOConfig() const { !Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() || !Common.UnneededSymbolsToRemove.empty() || !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() || - Common.ExtractDWO || Common.PreserveDates || Common.StripAllGNU || - Common.StripDWO || Common.StripNonAlloc || Common.StripSections || - Common.Weaken || Common.DecompressDebugSections || Common.StripUnneeded || + !Common.SetSectionType.empty() || Common.ExtractDWO || + Common.PreserveDates || Common.StripAllGNU || Common.StripDWO || + Common.StripNonAlloc || Common.StripSections || Common.Weaken || + Common.DecompressDebugSections || Common.StripUnneeded || Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty()) return createStringError(llvm::errc::invalid_argument, "option is not supported for MachO"); @@ -58,7 +59,8 @@ Expected<const WasmConfig &> ConfigManager::getWasmConfig() const { !Common.UnneededSymbolsToRemove.empty() || !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || - !Common.SetSectionFlags.empty() || !Common.SymbolsToRename.empty()) + !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() || + !Common.SymbolsToRename.empty()) return createStringError(llvm::errc::invalid_argument, "only flags for section dumping, removal, and " "addition are supported"); @@ -79,12 +81,12 @@ Expected<const XCOFFConfig &> ConfigManager::getXCOFFConfig() const { !Common.UnneededSymbolsToRemove.empty() || !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || - !Common.SetSectionFlags.empty() || !Common.SymbolsToRename.empty() || - Common.ExtractDWO || Common.ExtractMainPartition || - Common.OnlyKeepDebug || Common.PreserveDates || Common.StripAllGNU || - Common.StripDWO || Common.StripDebug || Common.StripNonAlloc || - Common.StripSections || Common.Weaken || Common.StripUnneeded || - Common.DecompressDebugSections) { + !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() || + !Common.SymbolsToRename.empty() || Common.ExtractDWO || + Common.ExtractMainPartition || Common.OnlyKeepDebug || + Common.PreserveDates || Common.StripAllGNU || Common.StripDWO || + Common.StripDebug || Common.StripNonAlloc || Common.StripSections || + Common.Weaken || Common.StripUnneeded || Common.DecompressDebugSections) { return createStringError( llvm::errc::invalid_argument, "no flags are supported yet, only basic copying is allowed"); diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp index 2d388f8a867e..781be3d8aeb1 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp @@ -600,8 +600,8 @@ handleUserSection(const NewSectionInfo &NewSection, static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, Object &Obj) { if (Config.OutputArch) { - Obj.Machine = Config.OutputArch.getValue().EMachine; - Obj.OSABI = Config.OutputArch.getValue().OSABI; + Obj.Machine = Config.OutputArch.value().EMachine; + Obj.OSABI = Config.OutputArch.value().OSABI; } if (!Config.SplitDWO.empty() && Config.ExtractDWO) { @@ -629,6 +629,66 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, if (Error E = updateAndRemoveSymbols(Config, ELFConfig, Obj)) return E; + if (!Config.SetSectionAlignment.empty()) { + for (SectionBase &Sec : Obj.sections()) { + auto I = Config.SetSectionAlignment.find(Sec.Name); + if (I != Config.SetSectionAlignment.end()) + Sec.Align = I->second; + } + } + + if (Config.OnlyKeepDebug) + for (auto &Sec : Obj.sections()) + if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE) + Sec.Type = SHT_NOBITS; + + for (const NewSectionInfo &AddedSection : Config.AddSection) { + auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) { + OwnedDataSection &NewSection = + Obj.addSection<OwnedDataSection>(Name, Data); + if (Name.startswith(".note") && Name != ".note.GNU-stack") + NewSection.Type = SHT_NOTE; + return Error::success(); + }; + if (Error E = handleUserSection(AddedSection, AddSection)) + return E; + } + + for (const NewSectionInfo &NewSection : Config.UpdateSection) { + auto UpdateSection = [&](StringRef Name, ArrayRef<uint8_t> Data) { + return Obj.updateSection(Name, Data); + }; + if (Error E = handleUserSection(NewSection, UpdateSection)) + return E; + } + + if (!Config.AddGnuDebugLink.empty()) + Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink, + Config.GnuDebugLinkCRC32); + + // If the symbol table was previously removed, we need to create a new one + // before adding new symbols. + if (!Obj.SymbolTable && !Config.SymbolsToAdd.empty()) + if (Error E = Obj.addNewSymbolTable()) + return E; + + for (const NewSymbolInfo &SI : Config.SymbolsToAdd) + addSymbol(Obj, SI, ELFConfig.NewSymbolVisibility); + + // --set-section-{flags,type} work with sections added by --add-section. + if (!Config.SetSectionFlags.empty() || !Config.SetSectionType.empty()) { + for (auto &Sec : Obj.sections()) { + const auto Iter = Config.SetSectionFlags.find(Sec.Name); + if (Iter != Config.SetSectionFlags.end()) { + const SectionFlagsUpdate &SFU = Iter->second; + setSectionFlagsAndType(Sec, SFU.NewFlags); + } + auto It2 = Config.SetSectionType.find(Sec.Name); + if (It2 != Config.SetSectionType.end()) + Sec.Type = It2->second; + } + } + if (!Config.SectionsToRename.empty()) { std::vector<RelocationSectionBase *> RelocSections; DenseSet<SectionBase *> RenamedSections; @@ -639,7 +699,7 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, const SectionRename &SR = Iter->second; Sec.Name = std::string(SR.NewName); if (SR.NewFlags) - setSectionFlagsAndType(Sec, SR.NewFlags.getValue()); + setSectionFlagsAndType(Sec, SR.NewFlags.value()); RenamedSections.insert(&Sec); } else if (RelocSec && !(Sec.Flags & SHF_ALLOC)) // Postpone processing relocation sections which are not specified in @@ -693,63 +753,6 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, } } - if (!Config.SetSectionAlignment.empty()) { - for (SectionBase &Sec : Obj.sections()) { - auto I = Config.SetSectionAlignment.find(Sec.Name); - if (I != Config.SetSectionAlignment.end()) - Sec.Align = I->second; - } - } - - if (Config.OnlyKeepDebug) - for (auto &Sec : Obj.sections()) - if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE) - Sec.Type = SHT_NOBITS; - - for (const NewSectionInfo &AddedSection : Config.AddSection) { - auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) { - OwnedDataSection &NewSection = - Obj.addSection<OwnedDataSection>(Name, Data); - if (Name.startswith(".note") && Name != ".note.GNU-stack") - NewSection.Type = SHT_NOTE; - return Error::success(); - }; - if (Error E = handleUserSection(AddedSection, AddSection)) - return E; - } - - for (const NewSectionInfo &NewSection : Config.UpdateSection) { - auto UpdateSection = [&](StringRef Name, ArrayRef<uint8_t> Data) { - return Obj.updateSection(Name, Data); - }; - if (Error E = handleUserSection(NewSection, UpdateSection)) - return E; - } - - if (!Config.AddGnuDebugLink.empty()) - Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink, - Config.GnuDebugLinkCRC32); - - // If the symbol table was previously removed, we need to create a new one - // before adding new symbols. - if (!Obj.SymbolTable && !Config.SymbolsToAdd.empty()) - if (Error E = Obj.addNewSymbolTable()) - return E; - - for (const NewSymbolInfo &SI : Config.SymbolsToAdd) - addSymbol(Obj, SI, ELFConfig.NewSymbolVisibility); - - // --set-section-flags works with sections added by --add-section. - if (!Config.SetSectionFlags.empty()) { - for (auto &Sec : Obj.sections()) { - const auto Iter = Config.SetSectionFlags.find(Sec.Name); - if (Iter != Config.SetSectionFlags.end()) { - const SectionFlagsUpdate &SFU = Iter->second; - setSectionFlagsAndType(Sec, SFU.NewFlags); - } - } - } - if (ELFConfig.EntryExpr) Obj.Entry = ELFConfig.EntryExpr(Obj.Entry); return Error::success(); @@ -808,7 +811,7 @@ Error objcopy::elf::executeObjcopyOnBinary(const CommonConfig &Config, return Obj.takeError(); // Prefer OutputArch (-O<format>) if set, otherwise infer it from the input. const ElfType OutputElfType = - Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue()) + Config.OutputArch ? getOutputElfType(Config.OutputArch.value()) : getOutputElfType(In); if (Error E = handleArgs(Config, ELFConfig, **Obj)) diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp index b241bd817ff5..f0e4f91cd347 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp @@ -463,13 +463,12 @@ Error ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) { ? (ZlibGnuMagic.size() + sizeof(Sec.Size)) : sizeof(Elf_Chdr_Impl<ELFT>); - StringRef CompressedContent( - reinterpret_cast<const char *>(Sec.OriginalData.data()) + DataOffset, - Sec.OriginalData.size() - DataOffset); - - SmallVector<char, 128> DecompressedContent; - if (Error Err = zlib::uncompress(CompressedContent, DecompressedContent, - static_cast<size_t>(Sec.Size))) + ArrayRef<uint8_t> CompressedContent(Sec.OriginalData.data() + DataOffset, + Sec.OriginalData.size() - DataOffset); + SmallVector<uint8_t, 128> DecompressedContent; + if (Error Err = + compression::zlib::uncompress(CompressedContent, DecompressedContent, + static_cast<size_t>(Sec.Size))) return createStringError(errc::invalid_argument, "'" + Sec.Name + "': " + toString(std::move(Err))); @@ -544,9 +543,7 @@ CompressedSection::CompressedSection(const SectionBase &Sec, DebugCompressionType CompressionType) : SectionBase(Sec), CompressionType(CompressionType), DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) { - zlib::compress(StringRef(reinterpret_cast<const char *>(OriginalData.data()), - OriginalData.size()), - CompressedData); + compression::zlib::compress(OriginalData, CompressedData); assert(CompressionType != DebugCompressionType::None); Flags |= ELF::SHF_COMPRESSED; @@ -2643,9 +2640,12 @@ Error BinaryWriter::finalize() { // MinAddr will be skipped. uint64_t MinAddr = UINT64_MAX; for (SectionBase &Sec : Obj.allocSections()) { + // If Sec's type is changed from SHT_NOBITS due to --set-section-flags, + // Offset may not be aligned. Align it to max(Align, 1). if (Sec.ParentSegment != nullptr) - Sec.Addr = - Sec.Offset - Sec.ParentSegment->Offset + Sec.ParentSegment->PAddr; + Sec.Addr = alignTo(Sec.Offset - Sec.ParentSegment->Offset + + Sec.ParentSegment->PAddr, + std::max(Sec.Align, uint64_t(1))); if (Sec.Type != SHT_NOBITS && Sec.Size > 0) MinAddr = std::min(MinAddr, Sec.Addr); } diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h index f33bbb029c9b..799db5034532 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.h +++ b/llvm/lib/ObjCopy/ELF/ELFObject.h @@ -539,7 +539,7 @@ class CompressedSection : public SectionBase { DebugCompressionType CompressionType; uint64_t DecompressedSize; uint64_t DecompressedAlign; - SmallVector<char, 128> CompressedData; + SmallVector<uint8_t, 128> CompressedData; public: CompressedSection(const SectionBase &Sec, diff --git a/llvm/lib/Object/Decompressor.cpp b/llvm/lib/Object/Decompressor.cpp index de067ed59ac5..a6a28a0589ac 100644 --- a/llvm/lib/Object/Decompressor.cpp +++ b/llvm/lib/Object/Decompressor.cpp @@ -19,7 +19,7 @@ using namespace object; Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data, bool IsLE, bool Is64Bit) { - if (!zlib::isAvailable()) + if (!compression::zlib::isAvailable()) return createError("zlib is not available"); Decompressor D(Data); @@ -92,7 +92,8 @@ bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name); } -Error Decompressor::decompress(MutableArrayRef<char> Buffer) { +Error Decompressor::decompress(MutableArrayRef<uint8_t> Buffer) { size_t Size = Buffer.size(); - return zlib::uncompress(SectionData, Buffer.data(), Size); + return compression::zlib::uncompress(arrayRefFromStringRef(SectionData), + Buffer.data(), Size); } diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 6acf4543be5a..0d5aa91c1348 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -297,6 +297,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_PHDR); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP_V0); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_OFFLOADING); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 38de669f1d3d..1f342e55e77f 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -168,11 +168,11 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { Optional<unsigned> Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); if (Attr) - isV7 = Attr.getValue() == ARMBuildAttrs::v7; + isV7 = Attr.value() == ARMBuildAttrs::v7; Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); if (Attr) { - switch (Attr.getValue()) { + switch (Attr.value()) { case ARMBuildAttrs::ApplicationProfile: Features.AddFeature("aclass"); break; @@ -191,7 +191,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { Attr = Attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use); if (Attr) { - switch (Attr.getValue()) { + switch (Attr.value()) { default: break; case ARMBuildAttrs::Not_Allowed: @@ -206,7 +206,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { Attr = Attributes.getAttributeValue(ARMBuildAttrs::FP_arch); if (Attr) { - switch (Attr.getValue()) { + switch (Attr.value()) { default: break; case ARMBuildAttrs::Not_Allowed: @@ -230,7 +230,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { Attr = Attributes.getAttributeValue(ARMBuildAttrs::Advanced_SIMD_arch); if (Attr) { - switch (Attr.getValue()) { + switch (Attr.value()) { default: break; case ARMBuildAttrs::Not_Allowed: @@ -249,7 +249,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { Attr = Attributes.getAttributeValue(ARMBuildAttrs::MVE_arch); if (Attr) { - switch (Attr.getValue()) { + switch (Attr.value()) { default: break; case ARMBuildAttrs::Not_Allowed: @@ -268,7 +268,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { Attr = Attributes.getAttributeValue(ARMBuildAttrs::DIV_use); if (Attr) { - switch (Attr.getValue()) { + switch (Attr.value()) { default: break; case ARMBuildAttrs::DisallowDIV: @@ -524,7 +524,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { Optional<unsigned> Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); if (Attr) { - switch (Attr.getValue()) { + switch (Attr.value()) { case ARMBuildAttrs::v4: Triple += "v4"; break; @@ -556,7 +556,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { Optional<unsigned> ArchProfileAttr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); if (ArchProfileAttr && - ArchProfileAttr.getValue() == ARMBuildAttrs::MicroControllerProfile) + ArchProfileAttr.value() == ARMBuildAttrs::MicroControllerProfile) Triple += "v7m"; else Triple += "v7"; diff --git a/llvm/lib/Object/Error.cpp b/llvm/lib/Object/Error.cpp index 6d1e3f2a59d0..62cb51ca09e4 100644 --- a/llvm/lib/Object/Error.cpp +++ b/llvm/lib/Object/Error.cpp @@ -13,7 +13,6 @@ #include "llvm/Object/Error.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" using namespace llvm; using namespace object; @@ -75,10 +74,9 @@ void GenericBinaryError::log(raw_ostream &OS) const { OS << Msg; } -static ManagedStatic<_object_error_category> error_category; - const std::error_category &object::object_category() { - return *error_category; + static _object_error_category error_category; + return error_category; } llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) { diff --git a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp index 9834b036de90..60870bbb801f 100644 --- a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp +++ b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp @@ -133,17 +133,17 @@ void DXContainerWriter::writeParts(raw_ostream &OS) { // Compute the optional fields if needed... if (P.Program->DXILOffset) - Header.Bitcode.Offset = P.Program->DXILOffset.getValue(); + Header.Bitcode.Offset = P.Program->DXILOffset.value(); else Header.Bitcode.Offset = sizeof(dxbc::BitcodeHeader); if (P.Program->DXILSize) - Header.Bitcode.Size = P.Program->DXILSize.getValue(); + Header.Bitcode.Size = P.Program->DXILSize.value(); else Header.Bitcode.Size = P.Program->DXIL ? P.Program->DXIL->size() : 0; if (P.Program->Size) - Header.Size = P.Program->Size.getValue(); + Header.Size = P.Program->Size.value(); else Header.Size = sizeof(dxbc::ProgramHeader) + Header.Bitcode.Size; diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index cdd180cdc15d..b778006cf66e 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -656,6 +656,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration( ECase(SHT_LLVM_PART_PHDR); ECase(SHT_LLVM_BB_ADDR_MAP_V0); ECase(SHT_LLVM_BB_ADDR_MAP); + ECase(SHT_LLVM_OFFLOADING); ECase(SHT_GNU_ATTRIBUTES); ECase(SHT_GNU_HASH); ECase(SHT_GNU_verdef); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index a5345172aae1..593243144f01 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1728,8 +1728,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // in ICP (which is performed earlier than this in the regular LTO pipeline). MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); - // Enable splitting late in the FullLTO post-link pipeline. This is done in - // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). + // Enable splitting late in the FullLTO post-link pipeline. if (EnableHotColdSplit) MPM.addPass(HotColdSplittingPass()); diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index ab9f8bf9c957..bad8184dffcf 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -901,10 +901,11 @@ bool OptNoneInstrumentation::shouldRun(StringRef PassID, Any IR) { void OptBisectInstrumentation::registerCallbacks( PassInstrumentationCallbacks &PIC) { - if (!OptBisector->isEnabled()) + if (!getOptBisector().isEnabled()) return; PIC.registerShouldRunOptionalPassCallback([](StringRef PassID, Any IR) { - return isIgnored(PassID) || OptBisector->checkPass(PassID, getIRName(IR)); + return isIgnored(PassID) || + getOptBisector().checkPass(PassID, getIRName(IR)); }); } diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index f9e58fd6afa5..f4f13bafb233 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -25,7 +25,6 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -897,10 +896,9 @@ std::string CoverageMapError::message() const { return getCoverageMapErrString(Err); } -static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory; - const std::error_category &llvm::coverage::coveragemap_category() { - return *ErrorCategory; + static CoverageMappingErrorCategoryType ErrorCategory; + return ErrorCategory; } char CoverageMapError::ID = 0; diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp index 1a187795a8a0..552140a52ad4 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -119,26 +119,26 @@ Error RawCoverageFilenamesReader::read(CovMapVersion Version) { return Err; if (CompressedLen > 0) { - if (!zlib::isAvailable()) + if (!compression::zlib::isAvailable()) return make_error<CoverageMapError>( coveragemap_error::decompression_failed); // Allocate memory for the decompressed filenames. - SmallVector<char, 0> StorageBuf; + SmallVector<uint8_t, 0> StorageBuf; // Read compressed filenames. StringRef CompressedFilenames = Data.substr(0, CompressedLen); Data = Data.substr(CompressedLen); - auto Err = - zlib::uncompress(CompressedFilenames, StorageBuf, UncompressedLen); + auto Err = compression::zlib::uncompress( + arrayRefFromStringRef(CompressedFilenames), StorageBuf, + UncompressedLen); if (Err) { consumeError(std::move(Err)); return make_error<CoverageMapError>( coveragemap_error::decompression_failed); } - StringRef UncompressedFilenames(StorageBuf.data(), StorageBuf.size()); - RawCoverageFilenamesReader Delegate(UncompressedFilenames, Filenames, + RawCoverageFilenamesReader Delegate(toStringRef(StorageBuf), Filenames, CompilationDir); return Delegate.readUncompressed(Version, NumFilenames); } diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp index 781a2901dbb9..db9be34d5248 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp @@ -46,11 +46,13 @@ void CoverageFilenamesSectionWriter::write(raw_ostream &OS, bool Compress) { } } - SmallString<128> CompressedStr; - bool doCompression = - Compress && zlib::isAvailable() && DoInstrProfNameCompression; + SmallVector<uint8_t, 128> CompressedStr; + bool doCompression = Compress && compression::zlib::isAvailable() && + DoInstrProfNameCompression; if (doCompression) - zlib::compress(FilenamesStr, CompressedStr, zlib::BestSizeCompression); + compression::zlib::compress(arrayRefFromStringRef(FilenamesStr), + CompressedStr, + compression::zlib::BestSizeCompression); // ::= <num-filenames> // <uncompressed-len> @@ -59,7 +61,7 @@ void CoverageFilenamesSectionWriter::write(raw_ostream &OS, bool Compress) { encodeULEB128(Filenames.size(), OS); encodeULEB128(FilenamesStr.size(), OS); encodeULEB128(doCompression ? CompressedStr.size() : 0U, OS); - OS << (doCompression ? CompressedStr.str() : StringRef(FilenamesStr)); + OS << (doCompression ? toStringRef(CompressedStr) : StringRef(FilenamesStr)); } namespace { diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 48ac5ce0d607..f8d7c4d36481 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -39,7 +39,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/SwapByteOrder.h" @@ -177,10 +176,9 @@ class InstrProfErrorCategoryType : public std::error_category { } // end anonymous namespace -static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory; - const std::error_category &llvm::instrprof_category() { - return *ErrorCategory; + static InstrProfErrorCategoryType ErrorCategory; + return ErrorCategory; } namespace { @@ -466,12 +464,13 @@ Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs, return WriteStringToResult(0, UncompressedNameStrings); } - SmallString<128> CompressedNameStrings; - zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings, - zlib::BestSizeCompression); + SmallVector<uint8_t, 128> CompressedNameStrings; + compression::zlib::compress(arrayRefFromStringRef(UncompressedNameStrings), + CompressedNameStrings, + compression::zlib::BestSizeCompression); return WriteStringToResult(CompressedNameStrings.size(), - CompressedNameStrings); + toStringRef(CompressedNameStrings)); } StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar) { @@ -488,7 +487,7 @@ Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars, NameStrs.push_back(std::string(getPGOFuncNameVarInitializer(NameVar))); } return collectPGOFuncNameStrings( - NameStrs, zlib::isAvailable() && doCompression, Result); + NameStrs, compression::zlib::isAvailable() && doCompression, Result); } Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { @@ -501,23 +500,20 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { uint64_t CompressedSize = decodeULEB128(P, &N); P += N; bool isCompressed = (CompressedSize != 0); - SmallString<128> UncompressedNameStrings; + SmallVector<uint8_t, 128> UncompressedNameStrings; StringRef NameStrings; if (isCompressed) { - if (!llvm::zlib::isAvailable()) + if (!llvm::compression::zlib::isAvailable()) return make_error<InstrProfError>(instrprof_error::zlib_unavailable); - StringRef CompressedNameStrings(reinterpret_cast<const char *>(P), - CompressedSize); - if (Error E = - zlib::uncompress(CompressedNameStrings, UncompressedNameStrings, - UncompressedSize)) { + if (Error E = compression::zlib::uncompress( + makeArrayRef(P, CompressedSize), UncompressedNameStrings, + UncompressedSize)) { consumeError(std::move(E)); return make_error<InstrProfError>(instrprof_error::uncompress_failed); } P += CompressedSize; - NameStrings = StringRef(UncompressedNameStrings.data(), - UncompressedNameStrings.size()); + NameStrings = toStringRef(UncompressedNameStrings); } else { NameStrings = StringRef(reinterpret_cast<const char *>(P), UncompressedSize); diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index f794e64a13e7..b4d5550a1721 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -20,7 +20,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include <string> #include <system_error> @@ -98,10 +97,9 @@ class SampleProfErrorCategoryType : public std::error_category { } // end anonymous namespace -static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory; - const std::error_category &llvm::sampleprof_category() { - return *ErrorCategory; + static SampleProfErrorCategoryType ErrorCategory; + return ErrorCategory; } void LineLocation::print(raw_ostream &OS) const { diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 280e3c6cb8d1..204e34bff879 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -877,15 +877,13 @@ std::error_code SampleProfileReaderExtBinaryBase::decompressSection( if (std::error_code EC = CompressSize.getError()) return EC; - if (!llvm::zlib::isAvailable()) + if (!llvm::compression::zlib::isAvailable()) return sampleprof_error::zlib_unavailable; - StringRef CompressedStrings(reinterpret_cast<const char *>(Data), - *CompressSize); - char *Buffer = Allocator.Allocate<char>(DecompressBufSize); + uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize); size_t UCSize = DecompressBufSize; - llvm::Error E = - zlib::uncompress(CompressedStrings, Buffer, UCSize); + llvm::Error E = compression::zlib::uncompress( + makeArrayRef(Data, *CompressSize), Buffer, UCSize); if (E) return sampleprof_error::uncompress_failed; DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer); diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 8ec6b7ebc29e..093790afe2d6 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -78,19 +78,20 @@ SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type, } std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() { - if (!llvm::zlib::isAvailable()) + if (!llvm::compression::zlib::isAvailable()) return sampleprof_error::zlib_unavailable; std::string &UncompressedStrings = static_cast<raw_string_ostream *>(LocalBufStream.get())->str(); if (UncompressedStrings.size() == 0) return sampleprof_error::success; auto &OS = *OutputStream; - SmallString<128> CompressedStrings; - zlib::compress(UncompressedStrings, CompressedStrings, - zlib::BestSizeCompression); + SmallVector<uint8_t, 128> CompressedStrings; + compression::zlib::compress(arrayRefFromStringRef(UncompressedStrings), + CompressedStrings, + compression::zlib::BestSizeCompression); encodeULEB128(UncompressedStrings.size(), OS); encodeULEB128(CompressedStrings.size(), OS); - OS << CompressedStrings.str(); + OS << toStringRef(CompressedStrings); UncompressedStrings.clear(); return sampleprof_error::success; } diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index eb6c04d987b3..e3df172ef113 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1862,8 +1862,10 @@ void basic_parser_impl::printOptionInfo(const Option &O, outs() << " <" << getValueStr(O, ValName) << ">..."; } else if (O.getValueExpectedFlag() == ValueOptional) outs() << "[=<" << getValueStr(O, ValName) << ">]"; - else - outs() << "=<" << getValueStr(O, ValName) << '>'; + else { + outs() << (O.ArgStr.size() == 1 ? " <" : "=<") << getValueStr(O, ValName) + << '>'; + } } Option::printHelpStr(O.HelpStr, GlobalWidth, getOptionWidth(O)); diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index 983a6348bbe4..21191972fb8b 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -22,11 +22,9 @@ #endif using namespace llvm; +using namespace llvm::compression; #if LLVM_ENABLE_ZLIB -static Error createError(StringRef Err) { - return make_error<StringError>(Err, inconvertibleErrorCode()); -} static StringRef convertZlibCodeToString(int Code) { switch (Code) { @@ -46,63 +44,59 @@ static StringRef convertZlibCodeToString(int Code) { bool zlib::isAvailable() { return true; } -void zlib::compress(StringRef InputBuffer, - SmallVectorImpl<char> &CompressedBuffer, int Level) { - unsigned long CompressedSize = ::compressBound(InputBuffer.size()); +void zlib::compress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) { + unsigned long CompressedSize = ::compressBound(Input.size()); CompressedBuffer.resize_for_overwrite(CompressedSize); - int Res = - ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize, - (const Bytef *)InputBuffer.data(), InputBuffer.size(), Level); + int Res = ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize, + (const Bytef *)Input.data(), Input.size(), Level); if (Res == Z_MEM_ERROR) report_bad_alloc_error("Allocation failed"); assert(Res == Z_OK); // Tell MemorySanitizer that zlib output buffer is fully initialized. // This avoids a false report when running LLVM with uninstrumented ZLib. __msan_unpoison(CompressedBuffer.data(), CompressedSize); - CompressedBuffer.truncate(CompressedSize); + if (CompressedSize < CompressedBuffer.size()) + CompressedBuffer.truncate(CompressedSize); } -Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer, +Error zlib::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer, size_t &UncompressedSize) { int Res = ::uncompress((Bytef *)UncompressedBuffer, (uLongf *)&UncompressedSize, - (const Bytef *)InputBuffer.data(), InputBuffer.size()); + (const Bytef *)Input.data(), Input.size()); // Tell MemorySanitizer that zlib output buffer is fully initialized. // This avoids a false report when running LLVM with uninstrumented ZLib. __msan_unpoison(UncompressedBuffer, UncompressedSize); - return Res ? createError(convertZlibCodeToString(Res)) : Error::success(); + return Res ? make_error<StringError>(convertZlibCodeToString(Res), + inconvertibleErrorCode()) + : Error::success(); } -Error zlib::uncompress(StringRef InputBuffer, - SmallVectorImpl<char> &UncompressedBuffer, +Error zlib::uncompress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &UncompressedBuffer, size_t UncompressedSize) { UncompressedBuffer.resize_for_overwrite(UncompressedSize); Error E = - uncompress(InputBuffer, UncompressedBuffer.data(), UncompressedSize); - UncompressedBuffer.truncate(UncompressedSize); + zlib::uncompress(Input, UncompressedBuffer.data(), UncompressedSize); + if (UncompressedSize < UncompressedBuffer.size()) + UncompressedBuffer.truncate(UncompressedSize); return E; } -uint32_t zlib::crc32(StringRef Buffer) { - return ::crc32(0, (const Bytef *)Buffer.data(), Buffer.size()); -} - #else bool zlib::isAvailable() { return false; } -void zlib::compress(StringRef InputBuffer, - SmallVectorImpl<char> &CompressedBuffer, int Level) { +void zlib::compress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) { llvm_unreachable("zlib::compress is unavailable"); } -Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer, +Error zlib::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer, size_t &UncompressedSize) { llvm_unreachable("zlib::uncompress is unavailable"); } -Error zlib::uncompress(StringRef InputBuffer, - SmallVectorImpl<char> &UncompressedBuffer, +Error zlib::uncompress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &UncompressedBuffer, size_t UncompressedSize) { llvm_unreachable("zlib::uncompress is unavailable"); } -uint32_t zlib::crc32(StringRef Buffer) { - llvm_unreachable("zlib::crc32 is unavailable"); -} #endif diff --git a/llvm/lib/Support/ConvertUTF.cpp b/llvm/lib/Support/ConvertUTF.cpp index e24a918c5c89..5436f557b993 100644 --- a/llvm/lib/Support/ConvertUTF.cpp +++ b/llvm/lib/Support/ConvertUTF.cpp @@ -417,6 +417,16 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { return isLegalUTF8(source, length); } +/* + * Exported function to return the size of the first utf-8 code unit sequence, + * Or 0 if the sequence is not valid; + */ +unsigned getUTF8SequenceSize(const UTF8 *source, const UTF8 *sourceEnd) { + int length = trailingBytesForUTF8[*source] + 1; + return (length <= sourceEnd - source && isLegalUTF8(source, length)) ? length + : 0; +} + /* --------------------------------------------------------------------- */ static unsigned diff --git a/llvm/lib/Support/Error.cpp b/llvm/lib/Support/Error.cpp index 8bfc8ee7a8cc..fbe86f2b59e1 100644 --- a/llvm/lib/Support/Error.cpp +++ b/llvm/lib/Support/Error.cpp @@ -9,7 +9,6 @@ #include "llvm/Support/Error.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include <system_error> using namespace llvm; @@ -46,7 +45,10 @@ namespace { } -static ManagedStatic<ErrorErrorCategory> ErrorErrorCat; +ErrorErrorCategory &getErrorErrorCat() { + static ErrorErrorCategory ErrorErrorCat; + return ErrorErrorCat; +} namespace llvm { @@ -71,19 +73,19 @@ void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner) { std::error_code ErrorList::convertToErrorCode() const { return std::error_code(static_cast<int>(ErrorErrorCode::MultipleErrors), - *ErrorErrorCat); + getErrorErrorCat()); } std::error_code inconvertibleErrorCode() { return std::error_code(static_cast<int>(ErrorErrorCode::InconvertibleError), - *ErrorErrorCat); + getErrorErrorCat()); } std::error_code FileError::convertToErrorCode() const { std::error_code NestedEC = Err->convertToErrorCode(); if (NestedEC == inconvertibleErrorCode()) return std::error_code(static_cast<int>(ErrorErrorCode::FileError), - *ErrorErrorCat); + getErrorErrorCat()); return NestedEC; } diff --git a/llvm/lib/Support/Process.cpp b/llvm/lib/Support/Process.cpp index cf3962ae927b..5476becc2945 100644 --- a/llvm/lib/Support/Process.cpp +++ b/llvm/lib/Support/Process.cpp @@ -47,7 +47,7 @@ Optional<std::string> Process::FindInEnvPath(StringRef EnvName, const char EnvPathSeparatorStr[] = {Separator, '\0'}; SmallVector<StringRef, 8> Dirs; - SplitString(OptPath.getValue(), Dirs, EnvPathSeparatorStr); + SplitString(OptPath.value(), Dirs, EnvPathSeparatorStr); for (StringRef Dir : Dirs) { if (Dir.empty()) diff --git a/llvm/lib/Support/Unicode.cpp b/llvm/lib/Support/Unicode.cpp index 103710303094..d4d7e75b739d 100644 --- a/llvm/lib/Support/Unicode.cpp +++ b/llvm/lib/Support/Unicode.cpp @@ -269,7 +269,7 @@ bool isPrintable(int UCS) { } /// Unicode code points of the Cf category are considered -/// fornatting characters. +/// formatting characters. bool isFormatting(int UCS) { // https://unicode.org/Public/14.0.0/ucdxml/ diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc index 3c2d118977c5..c1959b5cc2ae 100644 --- a/llvm/lib/Support/Unix/Process.inc +++ b/llvm/lib/Support/Unix/Process.inc @@ -14,7 +14,6 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" -#include "llvm/Support/ManagedStatic.h" #include <mutex> #if HAVE_FCNTL_H #include <fcntl.h> @@ -327,10 +326,6 @@ extern "C" int del_curterm(struct term *termp); extern "C" int tigetnum(char *capname); #endif -#ifdef LLVM_ENABLE_TERMINFO -static ManagedStatic<std::mutex> TermColorMutex; -#endif - bool checkTerminalEnvironmentForColors() { if (const char *TermStr = std::getenv("TERM")) { return StringSwitch<bool>(TermStr) @@ -351,7 +346,8 @@ bool checkTerminalEnvironmentForColors() { static bool terminalHasColors(int fd) { #ifdef LLVM_ENABLE_TERMINFO // First, acquire a global lock because these C routines are thread hostile. - std::lock_guard<std::mutex> G(*TermColorMutex); + static std::mutex TermColorMutex; + std::lock_guard<std::mutex> G(TermColorMutex); struct term *previous_term = set_curterm(nullptr); int errret = 0; diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 21f0c39bfd6e..97d63fff1069 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -2669,13 +2669,13 @@ void JSONWriter::write(ArrayRef<YAMLVFSEntry> Entries, " 'version': 0,\n"; if (IsCaseSensitive) OS << " 'case-sensitive': '" - << (IsCaseSensitive.getValue() ? "true" : "false") << "',\n"; + << (IsCaseSensitive.value() ? "true" : "false") << "',\n"; if (UseExternalNames) OS << " 'use-external-names': '" - << (UseExternalNames.getValue() ? "true" : "false") << "',\n"; + << (UseExternalNames.value() ? "true" : "false") << "',\n"; bool UseOverlayRelative = false; if (IsOverlayRelative) { - UseOverlayRelative = IsOverlayRelative.getValue(); + UseOverlayRelative = IsOverlayRelative.value(); OS << " 'overlay-relative': '" << (UseOverlayRelative ? "true" : "false") << "',\n"; } diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index 32477de5184b..1621f4a54b79 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -731,6 +731,11 @@ static bool GetDumpType(HKEY Key, MINIDUMP_TYPE &ResultType) { /// otherwise. static std::error_code WINAPI WriteWindowsDumpFile(PMINIDUMP_EXCEPTION_INFORMATION ExceptionInfo) { + struct ScopedCriticalSection { + ScopedCriticalSection() { EnterCriticalSection(&CriticalSection); } + ~ScopedCriticalSection() { LeaveCriticalSection(&CriticalSection); } + } SCS; + using namespace llvm; using namespace llvm::sys; diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index 10f9692d217e..2567f3ed8034 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -285,8 +285,9 @@ constexpr FeatureBitset FeaturesZNVER1 = FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC | FeatureXSAVEOPT | FeatureXSAVES; -constexpr FeatureBitset FeaturesZNVER2 = - FeaturesZNVER1 | FeatureCLWB | FeatureRDPID | FeatureWBNOINVD; +constexpr FeatureBitset FeaturesZNVER2 = FeaturesZNVER1 | FeatureCLWB | + FeatureRDPID | FeatureRDPRU | + FeatureWBNOINVD; static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 | FeatureINVPCID | FeaturePKU | FeatureVAES | FeatureVPCLMULQDQ; @@ -490,6 +491,7 @@ constexpr FeatureBitset ImpliedFeaturesPREFETCHWT1 = {}; constexpr FeatureBitset ImpliedFeaturesPRFCHW = {}; constexpr FeatureBitset ImpliedFeaturesPTWRITE = {}; constexpr FeatureBitset ImpliedFeaturesRDPID = {}; +constexpr FeatureBitset ImpliedFeaturesRDPRU = {}; constexpr FeatureBitset ImpliedFeaturesRDRND = {}; constexpr FeatureBitset ImpliedFeaturesRDSEED = {}; constexpr FeatureBitset ImpliedFeaturesRTM = {}; diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index 98ceea3c3c7a..651949ad5765 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -429,7 +429,7 @@ raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) { indent(FB.IndentLevel); if (FB.FirstByteOffset) { - uint64_t Offset = FB.FirstByteOffset.getValue(); + uint64_t Offset = FB.FirstByteOffset.value(); llvm::write_hex(*this, Offset + LineIndex, HPS, OffsetWidth); *this << ": "; } diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 6c205104d569..75a99e95541a 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -2601,7 +2601,7 @@ StringRef Record::getValueAsString(StringRef FieldName) const { if (!S) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName + "'!\n"); - return S.getValue(); + return S.value(); } llvm::Optional<StringRef> diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index f092c039b58e..b332e9dcb176 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -650,6 +650,7 @@ include "AArch64SchedA64FX.td" include "AArch64SchedThunderX3T110.td" include "AArch64SchedTSV110.td" include "AArch64SchedAmpere1.td" +include "AArch64SchedNeoverseN2.td" def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", "Cortex-A35 ARM processors">; @@ -1137,7 +1138,7 @@ def : ProcessorModel<"cortex-a78", CortexA57Model, ProcessorFeatures.A78, [TuneA78]>; def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C, [TuneA78C]>; -def : ProcessorModel<"cortex-a710", CortexA57Model, ProcessorFeatures.A710, +def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710, [TuneA710]>; def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82, [TuneR82]>; @@ -1145,17 +1146,17 @@ def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1, [TuneX1]>; def : ProcessorModel<"cortex-x1c", CortexA57Model, ProcessorFeatures.X1C, [TuneX1]>; -def : ProcessorModel<"cortex-x2", CortexA57Model, ProcessorFeatures.X2, +def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2, [TuneX2]>; def : ProcessorModel<"neoverse-e1", CortexA53Model, ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>; def : ProcessorModel<"neoverse-n1", CortexA57Model, ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>; -def : ProcessorModel<"neoverse-n2", CortexA57Model, +def : ProcessorModel<"neoverse-n2", NeoverseN2Model, ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>; -def : ProcessorModel<"neoverse-512tvb", CortexA57Model, +def : ProcessorModel<"neoverse-512tvb", NeoverseN2Model, ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>; -def : ProcessorModel<"neoverse-v1", CortexA57Model, +def : ProcessorModel<"neoverse-v1", NeoverseN2Model, ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>; def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3, [TuneExynosM3]>; diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index ef4860979dd3..c568f73471e1 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1173,6 +1173,8 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) { #include "AArch64GenMCPseudoLowering.inc" void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { + AArch64_MC::verifyInstructionPredicates(MI->getOpcode(), STI->getFeatureBits()); + // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index abfe2d507111..447ad10ddf22 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -237,6 +237,39 @@ static bool isMergePassthruOpcode(unsigned Opc) { } } +// Returns true if inactive lanes are known to be zeroed by construction. +static bool isZeroingInactiveLanes(SDValue Op) { + switch (Op.getOpcode()) { + default: + // We guarantee i1 splat_vectors to zero the other lanes by + // implementing it with ptrue and possibly a punpklo for nxv1i1. + if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) + return true; + return false; + case AArch64ISD::PTRUE: + case AArch64ISD::SETCC_MERGE_ZERO: + return true; + case ISD::INTRINSIC_WO_CHAIN: + switch (Op.getConstantOperandVal(0)) { + default: + return false; + case Intrinsic::aarch64_sve_ptrue: + case Intrinsic::aarch64_sve_pnext: + case Intrinsic::aarch64_sve_cmpeq_wide: + case Intrinsic::aarch64_sve_cmpne_wide: + case Intrinsic::aarch64_sve_cmpge_wide: + case Intrinsic::aarch64_sve_cmpgt_wide: + case Intrinsic::aarch64_sve_cmplt_wide: + case Intrinsic::aarch64_sve_cmple_wide: + case Intrinsic::aarch64_sve_cmphs_wide: + case Intrinsic::aarch64_sve_cmphi_wide: + case Intrinsic::aarch64_sve_cmplo_wide: + case Intrinsic::aarch64_sve_cmpls_wide: + return true; + } + } +} + AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -1082,6 +1115,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); } + // FIXME: Move lowering for more nodes here if those are common between + // SVE and SME. + if (Subtarget->hasSVE() || Subtarget->hasSME()) { + for (auto VT : + {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) { + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + } + } + if (Subtarget->hasSVE()) { for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -1162,14 +1205,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); @@ -2429,6 +2470,23 @@ AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } +MachineBasicBlock * +AArch64TargetLowering::EmitAddVectorToTile(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); + + MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); + MIB.addReg(BaseReg + MI.getOperand(0).getImm()); + MIB.add(MI.getOperand(1)); // pn + MIB.add(MI.getOperand(2)); // pm + MIB.add(MI.getOperand(3)); // zn + + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { @@ -2561,6 +2619,14 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( BB); case AArch64::ZERO_M_PSEUDO: return EmitZero(MI, BB); + case AArch64::ADDHA_MPPZ_PSEUDO_S: + return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_S, AArch64::ZAS0, MI, BB); + case AArch64::ADDVA_MPPZ_PSEUDO_S: + return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_S, AArch64::ZAS0, MI, BB); + case AArch64::ADDHA_MPPZ_PSEUDO_D: + return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_D, AArch64::ZAD0, MI, BB); + case AArch64::ADDVA_MPPZ_PSEUDO_D: + return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_D, AArch64::ZAD0, MI, BB); } } @@ -4329,55 +4395,49 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, int Pattern) { + if (VT == MVT::nxv1i1 && Pattern == AArch64SVEPredPattern::all) + return DAG.getConstant(1, DL, MVT::nxv1i1); return DAG.getNode(AArch64ISD::PTRUE, DL, VT, DAG.getTargetConstant(Pattern, DL, MVT::i32)); } -static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) { +// Returns a safe bitcast between two scalable vector predicates, where +// any newly created lanes from a widening bitcast are defined as zero. +static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); - EVT OutVT = Op.getValueType(); - SDValue InOp = Op.getOperand(1); - EVT InVT = InOp.getValueType(); + EVT InVT = Op.getValueType(); + + assert(InVT.getVectorElementType() == MVT::i1 && + VT.getVectorElementType() == MVT::i1 && + "Expected a predicate-to-predicate bitcast"); + assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && + InVT.isScalableVector() && + DAG.getTargetLoweringInfo().isTypeLegal(InVT) && + "Only expect to cast between legal scalable predicate types!"); // Return the operand if the cast isn't changing type, - // i.e. <n x 16 x i1> -> <n x 16 x i1> - if (InVT == OutVT) - return InOp; + // e.g. <n x 16 x i1> -> <n x 16 x i1> + if (InVT == VT) + return Op; - SDValue Reinterpret = - DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp); + SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op); - // If the argument converted to an svbool is a ptrue or a comparison, the - // lanes introduced by the widening are zero by construction. - switch (InOp.getOpcode()) { - case AArch64ISD::SETCC_MERGE_ZERO: + // We only have to zero the lanes if new lanes are being defined, e.g. when + // casting from <vscale x 2 x i1> to <vscale x 16 x i1>. If this is not the + // case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then + // we can return here. + if (InVT.bitsGT(VT)) return Reinterpret; - case ISD::INTRINSIC_WO_CHAIN: - switch (InOp.getConstantOperandVal(0)) { - case Intrinsic::aarch64_sve_ptrue: - case Intrinsic::aarch64_sve_cmpeq_wide: - case Intrinsic::aarch64_sve_cmpne_wide: - case Intrinsic::aarch64_sve_cmpge_wide: - case Intrinsic::aarch64_sve_cmpgt_wide: - case Intrinsic::aarch64_sve_cmplt_wide: - case Intrinsic::aarch64_sve_cmple_wide: - case Intrinsic::aarch64_sve_cmphs_wide: - case Intrinsic::aarch64_sve_cmphi_wide: - case Intrinsic::aarch64_sve_cmplo_wide: - case Intrinsic::aarch64_sve_cmpls_wide: - return Reinterpret; - } - } - // Splat vectors of one will generate ptrue instructions - if (ISD::isConstantSplatVectorAllOnes(InOp.getNode())) + // Check if the other lanes are already known to be zeroed by + // construction. + if (isZeroingInactiveLanes(Op)) return Reinterpret; - // Otherwise, zero the newly introduced lanes. - SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all); - SDValue MaskReinterpret = - DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask); - return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret); + // Zero the newly introduced lanes. + SDValue Mask = DAG.getConstant(1, DL, InVT); + Mask = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Mask); + return DAG.getNode(ISD::AND, DL, VT, Reinterpret, Mask); } SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, @@ -4546,10 +4606,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_dupq_lane: return LowerDUPQLane(Op, DAG); case Intrinsic::aarch64_sve_convert_from_svbool: - return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(), - Op.getOperand(1)); + return getSVEPredicateBitCast(Op.getValueType(), Op.getOperand(1), DAG); case Intrinsic::aarch64_sve_convert_to_svbool: - return lowerConvertToSVBool(Op, DAG); + return getSVEPredicateBitCast(MVT::nxv16i1, Op.getOperand(1), DAG); case Intrinsic::aarch64_sve_fneg: return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); @@ -6393,9 +6452,8 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) { if (SizeInBits < 8) return false; - APInt LowBits(SizeInBits, 0xFF); APInt RequredZero(SizeInBits, 0xFE); - KnownBits Bits = DAG.computeKnownBits(Arg, LowBits, 4); + KnownBits Bits = DAG.computeKnownBits(Arg, 4); bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero; return ZExtBool; } @@ -14814,16 +14872,6 @@ static SDValue performANDCombine(SDNode *N, if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); - // Although NEON has no EORV instruction, when only the least significant bit - // is required the operation is synonymous with ADDV. - if (LHS.getOpcode() == ISD::VECREDUCE_XOR && isOneConstant(RHS) && - LHS.getOperand(0).getValueType().isFixedLengthVector() && - LHS.hasOneUse()) { - SDLoc DL(N); - SDValue ADDV = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, LHS.getOperand(0)); - return DAG.getNode(ISD::AND, DL, VT, ADDV, RHS); - } - if (VT.isScalableVector()) return performSVEAndCombine(N, DCI); @@ -16126,12 +16174,24 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, assert(Op.getValueType().isScalableVector() && TLI.isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"); + assert(Op.getValueType() == Pg.getValueType() && + "Expected same type for PTEST operands"); // Ensure target specific opcodes are using legal type. EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue TVal = DAG.getConstant(1, DL, OutVT); SDValue FVal = DAG.getConstant(0, DL, OutVT); + // Ensure operands have type nxv16i1. + if (Op.getValueType() != MVT::nxv16i1) { + if ((Cond == AArch64CC::ANY_ACTIVE || Cond == AArch64CC::NONE_ACTIVE) && + isZeroingInactiveLanes(Op)) + Pg = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, Pg); + else + Pg = getSVEPredicateBitCast(MVT::nxv16i1, Pg, DAG); + Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, Op); + } + // Set condition code (CC) flags. SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op); @@ -18026,6 +18086,54 @@ static SDValue performCSELCombine(SDNode *N, return performCONDCombine(N, DCI, DAG, 2, 3); } +// Try to re-use an already extended operand of a vector SetCC feeding a +// extended select. Doing so avoids requiring another full extension of the +// SET_CC result when lowering the select. +static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) { + EVT Op0MVT = Op->getOperand(0).getValueType(); + if (!Op0MVT.isVector() || Op->use_empty()) + return SDValue(); + + // Make sure that all uses of Op are VSELECTs with result matching types where + // the result type has a larger element type than the SetCC operand. + SDNode *FirstUse = *Op->use_begin(); + if (FirstUse->getOpcode() != ISD::VSELECT) + return SDValue(); + EVT UseMVT = FirstUse->getValueType(0); + if (UseMVT.getScalarSizeInBits() <= Op0MVT.getScalarSizeInBits()) + return SDValue(); + if (any_of(Op->uses(), [&UseMVT](const SDNode *N) { + return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT; + })) + return SDValue(); + + APInt V; + if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V)) + return SDValue(); + + SDLoc DL(Op); + SDValue Op0ExtV; + SDValue Op1ExtV; + ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get(); + // Check if the first operand of the SET_CC is already extended. If it is, + // split the SET_CC and re-use the extended version of the operand. + SDNode *Op0SExt = DAG.getNodeIfExists(ISD::SIGN_EXTEND, DAG.getVTList(UseMVT), + Op->getOperand(0)); + SDNode *Op0ZExt = DAG.getNodeIfExists(ISD::ZERO_EXTEND, DAG.getVTList(UseMVT), + Op->getOperand(0)); + if (Op0SExt && (isSignedIntSetCC(CC) || isIntEqualitySetCC(CC))) { + Op0ExtV = SDValue(Op0SExt, 0); + Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1)); + } else if (Op0ZExt && (isUnsignedIntSetCC(CC) || isIntEqualitySetCC(CC))) { + Op0ExtV = SDValue(Op0ZExt, 0); + Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1)); + } else + return SDValue(); + + return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1), + Op0ExtV, Op1ExtV, Op->getOperand(2)); +} + static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!"); SDValue LHS = N->getOperand(0); @@ -18034,6 +18142,9 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); EVT VT = N->getValueType(0); + if (SDValue V = tryToWidenSetCCOperands(N, DAG)) + return V; + // setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X if (Cond == ISD::SETNE && isOneConstant(RHS) && LHS->getOpcode() == AArch64ISD::CSEL && @@ -21045,7 +21156,7 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp, default: return SDValue(); case ISD::VECREDUCE_OR: - if (isAllActivePredicate(DAG, Pg)) + if (isAllActivePredicate(DAG, Pg) && OpVT == MVT::nxv16i1) // The predicate can be 'Op' because // vecreduce_or(Op & <all true>) <=> vecreduce_or(Op). return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE); @@ -21058,6 +21169,11 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp, case ISD::VECREDUCE_XOR: { SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64); + if (OpVT == MVT::nxv1i1) { + // Emulate a CNTP on .Q using .D and a different governing predicate. + Pg = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv2i1, Pg); + Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv2i1, Op); + } SDValue Cntp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op); return DAG.getAnyExtOrTrunc(Cntp, DL, VT); @@ -21464,22 +21580,17 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT InVT = Op.getValueType(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - (void)TLI; - assert(VT.isScalableVector() && TLI.isTypeLegal(VT) && - InVT.isScalableVector() && TLI.isTypeLegal(InVT) && + assert(VT.isScalableVector() && isTypeLegal(VT) && + InVT.isScalableVector() && isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"); - assert((VT.getVectorElementType() == MVT::i1) == - (InVT.getVectorElementType() == MVT::i1) && - "Cannot cast between data and predicate scalable vector types!"); + assert(VT.getVectorElementType() != MVT::i1 && + InVT.getVectorElementType() != MVT::i1 && + "For predicate bitcasts, use getSVEPredicateBitCast"); if (InVT == VT) return Op; - if (VT.getVectorElementType() == MVT::i1) - return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op); - EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType()); EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType()); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 06ea918ea32e..e02b5e56fd2e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -571,6 +571,9 @@ public: MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitAddVectorToTile(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, @@ -1148,6 +1151,7 @@ private: // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used // to transition between unpacked and packed types of the same element type, // with BITCAST used otherwise. + // This function does not handle predicate bitcasts. SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index c477a44b13b2..6839e73796a6 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -29,21 +29,21 @@ def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>; // An atomic load operation that does not need either acquire or release // semantics. -class relaxed_load<PatFrag base> +class relaxed_load<PatFrags base> : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; let IsAtomicOrderingAcquireOrStronger = 0; } // A atomic load operation that actually needs acquire semantics. -class acquiring_load<PatFrag base> +class acquiring_load<PatFrags base> : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; let IsAtomicOrderingAcquire = 1; } // An atomic load operation that needs sequential consistency. -class seq_cst_load<PatFrag base> +class seq_cst_load<PatFrags base> : PatFrag<(ops node:$ptr), (base node:$ptr)> { let IsAtomic = 1; let IsAtomicOrderingSequentiallyConsistent = 1; @@ -63,34 +63,34 @@ let Predicates = [HasLDAPR] in { } // 8-bit loads -def : Pat<(seq_cst_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; -def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; -def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, +def : Pat<(seq_cst_load<atomic_load_az_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; +def : Pat<(acquiring_load<atomic_load_az_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; +def : Pat<(relaxed_load<atomic_load_az_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)), (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>; -def : Pat<(relaxed_load<atomic_load_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend8:$offset)), +def : Pat<(relaxed_load<atomic_load_az_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend8:$offset)), (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>; -def : Pat<(relaxed_load<atomic_load_8> (am_indexed8 GPR64sp:$Rn, - uimm12s1:$offset)), +def : Pat<(relaxed_load<atomic_load_az_8> (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; -def : Pat<(relaxed_load<atomic_load_8> +def : Pat<(relaxed_load<atomic_load_az_8> (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), (LDURBBi GPR64sp:$Rn, simm9:$offset)>; // 16-bit loads -def : Pat<(seq_cst_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; -def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; -def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, +def : Pat<(seq_cst_load<atomic_load_az_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; +def : Pat<(acquiring_load<atomic_load_az_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; +def : Pat<(relaxed_load<atomic_load_az_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)), (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; -def : Pat<(relaxed_load<atomic_load_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend)), +def : Pat<(relaxed_load<atomic_load_az_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; -def : Pat<(relaxed_load<atomic_load_16> (am_indexed16 GPR64sp:$Rn, - uimm12s2:$offset)), +def : Pat<(relaxed_load<atomic_load_az_16> (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; -def : Pat<(relaxed_load<atomic_load_16> +def : Pat<(relaxed_load<atomic_load_az_16> (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), (LDURHHi GPR64sp:$Rn, simm9:$offset)>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 78bc1b8c6f02..02fa36a1df4b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1505,7 +1505,7 @@ class CRmSystemI<Operand crmtype, bits<3> opc, string asm, class SystemNoOperands<bits<3> op2, string asm, list<dag> pattern = []> : SimpleSystemI<0, (ins), asm, "", pattern>, - Sched<[]> { + Sched<[WriteHint]> { bits<4> CRm; let CRm = 0b0011; let Inst{31-12} = 0b11010101000000110010; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 3802a45ad6c1..d444223e4494 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4356,10 +4356,12 @@ defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; // AArch64's FCVT instructions saturate when out of range. multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> { + let Predicates = [HasFullFP16] in { def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)), (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>; def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)), (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>; + } def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)), (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>; def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)), diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 68ff1b78e84b..c66f9cfd9c22 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -778,7 +778,7 @@ let Predicates = [HasSVEorSME] in { defm BRKB_PPmP : sve_int_break_m<0b101, "brkb", int_aarch64_sve_brkb>; defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>; - def PTEST_PP : sve_int_ptest<0b010000, "ptest">; + def PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest>; defm PFALSE : sve_int_pfalse<0b000000, "pfalse">; defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>; defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>; @@ -1531,6 +1531,14 @@ let Predicates = [HasSVEorSME] in { def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), (PUNPKHI_PP PPR:$Ps)>; + def : Pat<(nxv1i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))), + (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; + def : Pat<(nxv1i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 1))), + (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>; + def : Pat<(nxv1i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))), + (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; + def : Pat<(nxv1i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 3))), + (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))), (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))), @@ -1539,7 +1547,6 @@ let Predicates = [HasSVEorSME] in { (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))), (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; - def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))), @@ -1549,6 +1556,23 @@ let Predicates = [HasSVEorSME] in { def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))), (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; + + def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))), + (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 1))), + (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))), + (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 3))), + (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))), + (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; + def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 5))), + (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; + def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))), + (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; + def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 7))), + (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))), @@ -1566,6 +1590,39 @@ let Predicates = [HasSVEorSME] in { def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))), (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), + (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 1))), + (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))), + (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 3))), + (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))), + (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 5))), + (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 6))), + (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 7))), + (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), + (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 9))), + (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 10))), + (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 11))), + (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))), + (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 13))), + (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))), + (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>; + def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 15))), + (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>; + // Extract subvectors from FP SVE vectors def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))), (UUNPKLO_ZZ_D ZPR:$Zs)>; @@ -2074,15 +2131,6 @@ let Predicates = [HasSVEorSME] in { def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; } - def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)), - (PTEST_PP PPR:$pg, PPR:$src)>; - def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)), - (PTEST_PP PPR:$pg, PPR:$src)>; - def : Pat<(AArch64ptest (nxv4i1 PPR:$pg), (nxv4i1 PPR:$src)), - (PTEST_PP PPR:$pg, PPR:$src)>; - def : Pat<(AArch64ptest (nxv2i1 PPR:$pg), (nxv2i1 PPR:$src)), - (PTEST_PP PPR:$pg, PPR:$src)>; - let AddedComplexity = 1 in { class LD1RPat<ValueType vt, SDPatternOperator operator, Instruction load, Instruction ptrue, ValueType index_vt, ComplexPattern CP, Operand immtype> : @@ -2347,6 +2395,9 @@ let Predicates = [HasSVEorSME] in { (AND_PPzPP (PTRUE_S 31), PPR:$Ps1, PPR:$Ps2)>; def : Pat<(nxv2i1 (and PPR:$Ps1, PPR:$Ps2)), (AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>; + // Emulate .Q operation using a PTRUE_D when the other lanes don't matter. + def : Pat<(nxv1i1 (and PPR:$Ps1, PPR:$Ps2)), + (AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>; // Add more complex addressing modes here as required multiclass pred_load<ValueType Ty, ValueType PredTy, SDPatternOperator Load, diff --git a/llvm/lib/Target/AArch64/AArch64SchedA53.td b/llvm/lib/Target/AArch64/AArch64SchedA53.td index d18a05fda191..e378b043d37e 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA53.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td @@ -28,7 +28,8 @@ def CortexA53Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); } diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td index c6b112d0d2f1..141cc6b79c8b 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA55.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td @@ -29,7 +29,7 @@ def CortexA55Model : SchedMachineModel { let PostRAScheduler = 1; // Enable PostRA scheduler pass. let CompleteModel = 0; // Covers instructions applicable to Cortex-A55. - list<Predicate> UnsupportedFeatures = [HasSVE]; + list<Predicate> UnsupportedFeatures = [HasSVE, HasMTE]; // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td index a860aa907fd1..8ce229374000 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA57.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -33,7 +33,8 @@ def CortexA57Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); } //===----------------------------------------------------------------------===// @@ -459,9 +460,9 @@ def : InstRW<[A57Write_5cyc_2V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCM // ASIMD FP convert, long and narrow def : InstRW<[A57Write_8cyc_3V], (instregex "^FCVT(L|N|XN)v")>; // ASIMD FP convert, other, D-form -def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; +def : InstRW<[A57Write_5cyc_1V], (instregex "^[FSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; // ASIMD FP convert, other, Q-form -def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; +def : InstRW<[A57Write_5cyc_2V], (instregex "^[FSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; // ASIMD FP divide, D-form, F32 def : InstRW<[A57Write_17cyc_1W], (instregex "FDIVv2f32")>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index 6b053f1969b4..4c65b6727d93 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -18,11 +18,11 @@ def A64FXModel : SchedMachineModel { // Determined via a mix of micro-arch details and experimentation. let LoopMicroOpBufferSize = 128; let PostRAScheduler = 1; // Using PostRA sched. - let CompleteModel = 1; + let CompleteModel = 0; list<Predicate> UnsupportedFeatures = [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth, - HasSVE2orSME]; + HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16]; let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td index 32f7299fbf87..b8d5a70d7ec6 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td @@ -25,7 +25,9 @@ def Ampere1Model : SchedMachineModel { let CompleteModel = 1; list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + PAUnsupported.F, + [HasMTE]); } let SchedModel = Ampere1Model in { diff --git a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td index 9fbb46919427..e2d916954060 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td +++ b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td @@ -20,7 +20,8 @@ def CycloneModel : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td index d66efb82fccc..f2863f5a8e3b 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td @@ -26,7 +26,8 @@ def ExynosM3Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td index 94e70793e855..ab1e680f9e99 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -26,7 +26,8 @@ def ExynosM4Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td index 1db5f5322a64..ae0b2b3eaeb6 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td @@ -26,7 +26,8 @@ def ExynosM5Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td index 7c9b0afdd169..a765cd1cdfe3 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td @@ -25,7 +25,8 @@ def FalkorModel : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/llvm/lib/Target/AArch64/AArch64SchedKryo.td index cc568a2f2f17..3551066ee7c3 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedKryo.td +++ b/llvm/lib/Target/AArch64/AArch64SchedKryo.td @@ -29,7 +29,8 @@ def KryoModel : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td new file mode 100644 index 000000000000..eb5b971d66e5 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -0,0 +1,2279 @@ +//=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the scheduling model for the Arm Neoverse N2 processors. +// +//===----------------------------------------------------------------------===// + +def NeoverseN2Model : SchedMachineModel { + let IssueWidth = 10; // Micro-ops dispatched at a time. + let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 10; // Extra cycles for mispredicted branch. + let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = SMEUnsupported.F; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Neoverse N2. +// Instructions are first fetched and then decoded into internal macro-ops +// (MOPs). From there, the MOPs proceed through register renaming and dispatch +// stages. A MOP can be split into two micro-ops further down the pipeline +// after the decode stage. Once dispatched, micro-ops wait for their operands +// and issue out-of-order to one of thirteen issue pipelines. Each issue +// pipeline can accept one micro-op per cycle. + +let SchedModel = NeoverseN2Model in { + +// Define the (13) issue ports. +def N2UnitB : ProcResource<2>; // Branch 0/1 +def N2UnitS : ProcResource<2>; // Integer single Cycle 0/1 +def N2UnitM0 : ProcResource<1>; // Integer multicycle 0 +def N2UnitM1 : ProcResource<1>; // Integer multicycle 1 +def N2UnitL01 : ProcResource<2>; // Load/Store 0/1 +def N2UnitL2 : ProcResource<1>; // Load 2 +def N2UnitD : ProcResource<2>; // Store data 0/1 +def N2UnitV0 : ProcResource<1>; // FP/ASIMD 0 +def N2UnitV1 : ProcResource<1>; // FP/ASIMD 1 + +def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>; // FP/ASIMD 0/1 +def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>; // Integer single/multicycle 0/1 +def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2 +def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1 + +// Define commonly used read types. + +// No forwarding is provided for these types. +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 0>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadST, 0>; +def : ReadAdvance<ReadVLD, 0>; + +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } +def : WriteRes<WriteLDHi, []> { let Latency = 4; } + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Neoverse N2. + +//===----------------------------------------------------------------------===// +// Define generic 1 micro-op types + +def N2Write_1cyc_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; } +def N2Write_1cyc_1I : SchedWriteRes<[N2UnitI]> { let Latency = 1; } +def N2Write_1cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 1; } +def N2Write_1cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 1; } +def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; } +def N2Write_2cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 2; } +def N2Write_3cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 3; } +def N2Write_2cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 2; + let ResourceCycles = [2]; } +def N2Write_3cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 3; + let ResourceCycles = [3]; } +def N2Write_5cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 5; + let ResourceCycles = [5]; } +def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 12; + let ResourceCycles = [12]; } +def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 20; + let ResourceCycles = [20]; } +def N2Write_4cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 4; } +def N2Write_6cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 6; } +def N2Write_2cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 2; } +def N2Write_3cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 3; } +def N2Write_4cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 4; } +def N2Write_5cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 5; } +def N2Write_12cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 12; } +def N2Write_2cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 2; } +def N2Write_3cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 3; } +def N2Write_4cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 4; } +def N2Write_7cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 7; + let ResourceCycles = [7]; } +def N2Write_9cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 9; } +def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 10; } +def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 12; } +def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 13; } +def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 15; } +def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 16; } +def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 20; } +def N2Write_2cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 2; } +def N2Write_3cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 3; } +def N2Write_4cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 4; } +def N2Write_6cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 6; } +def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 10; } +def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; } + +//===----------------------------------------------------------------------===// +// Define generic 2 micro-op types + +def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> { + let Latency = 9; + let NumMicroOps = 2; +} + +def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [5, 5]; +} + +def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { + let Latency = 13; + let NumMicroOps = 2; + let ResourceCycles = [6, 7]; +} + +def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { + let Latency = 15; + let NumMicroOps = 2; + let ResourceCycles = [7, 8]; +} + +def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { + let Latency = 16; + let NumMicroOps = 2; + let ResourceCycles = [8, 8]; +} + +def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def N2Write_3cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def N2Write_2cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} + +def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> { + let Latency = 4; + let NumMicroOps = 2; +} + +//===----------------------------------------------------------------------===// +// Define generic 3 micro-op types + +def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> { + let Latency = 1; + let NumMicroOps = 3; +} + +def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> { + let Latency = 7; + let NumMicroOps = 3; +} + +def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} + +def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> { + let Latency = 10; + let NumMicroOps = 3; +} + +def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} + +//===----------------------------------------------------------------------===// +// Define generic 4 micro-op types + +def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV, + N2UnitI]> { + let Latency = 2; + let NumMicroOps = 4; +} + +def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 4; + let NumMicroOps = 4; +} + +def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { + let Latency = 8; + let NumMicroOps = 4; +} + +def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { + let Latency = 9; + let NumMicroOps = 4; +} + +def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, + N2UnitV]> { + let Latency = 2; + let NumMicroOps = 4; +} + +def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, + N2UnitV]> { + let Latency = 4; + let NumMicroOps = 4; +} + +def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, + N2UnitV]> { + let Latency = 5; + let NumMicroOps = 4; +} + +def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0, + N2UnitV0]> { + let Latency = 8; + let NumMicroOps = 4; +} + +def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, + N2UnitV1]> { + let Latency = 11; + let NumMicroOps = 4; +} + +def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, + N2UnitV1]> { + let Latency = 9; + let NumMicroOps = 4; +} + +def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, + N2UnitV1]> { + let Latency = 8; + let NumMicroOps = 4; +} + +def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, + N2UnitV1]> { + let Latency = 10; + let NumMicroOps = 4; +} + +def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { + let Latency = 10; + let NumMicroOps = 4; +} + +def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM, + N2UnitM]> { + let Latency = 4; + let NumMicroOps = 4; +} + +def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> { + let Latency = 7; + let NumMicroOps = 4; +} + +//===----------------------------------------------------------------------===// +// Define generic 5 micro-op types + +def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV, + N2UnitI, N2UnitI]> { + let Latency = 2; + let NumMicroOps = 5; +} + +def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV, + N2UnitV]> { + let Latency = 8; + let NumMicroOps = 5; +} + +//===----------------------------------------------------------------------===// +// Define generic 6 micro-op types + +def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, + N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 8; + let NumMicroOps = 6; +} + +def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 2; + let NumMicroOps = 6; +} + +def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 6; + let NumMicroOps = 6; +} + +def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 4; + let NumMicroOps = 6; +} + +def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV, + N2UnitS, N2UnitS]> { + let Latency = 10; + let NumMicroOps = 6; +} + +//===----------------------------------------------------------------------===// +// Define generic 7 micro-op types + +def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, + N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 8; + let NumMicroOps = 7; +} + +//===----------------------------------------------------------------------===// +// Define generic 8 micro-op types + +def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV, + N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 6; + let NumMicroOps = 8; +} + +def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitL01, N2UnitV, N2UnitV, N2UnitV, + N2UnitV]> { + let Latency = 2; + let NumMicroOps = 8; +} + +def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitL01, N2UnitV, N2UnitV, N2UnitV, + N2UnitV]> { + let Latency = 5; + let NumMicroOps = 8; +} + +def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, + N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 8; + let NumMicroOps = 8; +} + +def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, + N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 9; + let NumMicroOps = 8; +} + +//===----------------------------------------------------------------------===// +// Define generic 10 micro-op types + +def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitL01, N2UnitL01, N2UnitV, + N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 7; + let NumMicroOps = 10; +} + +//===----------------------------------------------------------------------===// +// Define generic 12 micro-op types + +def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitV, N2UnitV, N2UnitV, N2UnitV, + N2UnitV, N2UnitV]> { + let Latency = 7; + let NumMicroOps = 12; +} + +//===----------------------------------------------------------------------===// +// Define generic 15 micro-op types + +def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitL01, N2UnitL01, N2UnitS, + N2UnitS, N2UnitS, N2UnitS, + N2UnitS, N2UnitV, N2UnitV, + N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 7; + let NumMicroOps = 15; +} + +//===----------------------------------------------------------------------===// +// Define generic 18 micro-op types + +def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitV, N2UnitV, N2UnitV, + N2UnitV, N2UnitV, N2UnitV, + N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 11; + let NumMicroOps = 18; +} + +//===----------------------------------------------------------------------===// +// Define generic 27 micro-op types + +def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitL01, N2UnitL01, N2UnitL01, + N2UnitS, N2UnitS, N2UnitS, + N2UnitS, N2UnitS, N2UnitS, + N2UnitS, N2UnitS, N2UnitS, + N2UnitV, N2UnitV, N2UnitV, + N2UnitV, N2UnitV, N2UnitV, + N2UnitV, N2UnitV, N2UnitV]> { + let Latency = 11; + let NumMicroOps = 27; +} + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[WriteI], (instrs COPY)>; + +// Branch Instructions +// ----------------------------------------------------------------------------- + +// Branch, immed +// Compare and branch +def : SchedAlias<WriteBr, N2Write_1cyc_1B>; + +// Branch, register +def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>; + +// Branch and link, immed +// Branch and link, register +def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>; + +// Arithmetic and Logical Instructions +// ----------------------------------------------------------------------------- + +// ALU, basic +// ALU, basic, flagset +def : SchedAlias<WriteI, N2Write_1cyc_1I>; + +// ALU, extend and shift +def : SchedAlias<WriteISReg, N2Write_2cyc_1M>; +def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>; + +// Arithmetic, immediate to logical address tag +def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>; + +// Convert floating-point condition flags +// Flag manipulation instructions +def : WriteRes<WriteSys, []> { let Latency = 1; } + +// Insert Random Tags +def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>; + +// Insert Tag Mask +// Subtract Pointer +// Subtract Pointer, flagset +def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>; + +// Move and shift instructions +// ----------------------------------------------------------------------------- + +def : SchedAlias<WriteImm, N2Write_1cyc_1I>; + +// Divide and Multiply Instructions +// ----------------------------------------------------------------------------- + +// SDIV, UDIV +def : SchedAlias<WriteID32, N2Write_12cyc_1M0>; +def : SchedAlias<WriteID64, N2Write_20cyc_1M0>; + +def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; } +def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; } + +// Multiply high +def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>; + +// Pointer Authentication Instructions (v8.3 PAC) +// ----------------------------------------------------------------------------- + +// Authenticate data address +// Authenticate instruction address +// Compute pointer authentication code for data address +// Compute pointer authentication code, using generic key +// Compute pointer authentication code for instruction address +def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>; + +// Branch and link, register, with pointer authentication +// Branch, register, with pointer authentication +// Branch, return, with pointer authentication +def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA, + BRAAZ, BRAB, BRABZ, RETAA, RETAB, + ERETAA, ERETAB)>; + + +// Load register, with pointer authentication +def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; + +// Strip pointer authentication code +def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>; + +// Miscellaneous data-processing instructions +// ----------------------------------------------------------------------------- + +// Bitfield extract, one reg +// Bitfield extract, two regs +// NOTE: We don't model the difference between EXTR where both operands are the +// same (one reg). +def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>; +def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>; + +// Bitfield move, basic +def : SchedAlias<WriteIS, N2Write_1cyc_1I>; + +// Bitfield move, insert +def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>; + +// Load instructions +// ----------------------------------------------------------------------------- + +def : SchedAlias<WriteLD, N2Write_4cyc_1L>; +def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>; + +// Load pair, signed immed offset, signed words +def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>; +// Load pair, immed post-index or immed pre-index, signed words +def : InstRW<[N2Write_5cyc_1M0, WriteLDHi, WriteAdr], + (instregex "^LDPSW(post|pre)$")>; + +// Store instructions +// ----------------------------------------------------------------------------- + +def : SchedAlias<WriteST, N2Write_1cyc_1L01_1D>; +def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>; +def : SchedAlias<WriteSTP, N2Write_1cyc_1L01_1D>; +def : SchedAlias<WriteAdr, N2Write_1cyc_1I>; // copied from A57. + +// Tag load instructions +// ----------------------------------------------------------------------------- + +// Load allocation tag +// Load multiple allocation tags +def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>; + +// Tag store instructions +// ----------------------------------------------------------------------------- + +// Store allocation tags to one or two granules, post-index +// Store allocation tags to one or two granules, pre-index +// Store allocation tag to one or two granules, zeroing, post-index +// Store Allocation Tag to one or two granules, zeroing, pre-index +// Store allocation tag and reg pair to memory, post-Index +// Store allocation tag and reg pair to memory, pre-Index +def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex, + ST2GPreIndex, ST2GPostIndex, + STZGPreIndex, STZGPostIndex, + STZ2GPreIndex, STZ2GPostIndex, + STGPpre, STGPpost)>; + +// Store allocation tags to one or two granules, signed offset +// Store allocation tag to two granules, zeroing, signed offset +// Store allocation tag and reg pair to memory, signed offset +// Store multiple allocation tags +def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGOffset, ST2GOffset, STZGOffset, + STZ2GOffset, STGPi, STGM, STZGM)>; + +// FP data processing instructions +// ----------------------------------------------------------------------------- + +// FP absolute value +// FP arithmetic +// FP min/max +// FP negate +// FP select +def : SchedAlias<WriteF, N2Write_2cyc_1V>; + +// FP compare +def : SchedAlias<WriteFCmp, N2Write_2cyc_1V0>; + +// FP divide, square root +def : SchedAlias<WriteFDiv, N2Write_7cyc_1V0>; + +// FP divide, H-form +def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVHrr)>; +// FP divide, S-form +def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>; +// FP divide, D-form +def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>; + +// FP square root, H-form +def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTHr)>; +// FP square root, S-form +def : InstRW<[N2Write_9cyc_1V0], (instrs FSQRTSr)>; +// FP square root, D-form +def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>; + +// FP multiply +def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; } + +// FP multiply accumulate +def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; + +// FP round to integral +def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$", + "^FRINT(32|64)[XZ][SD]r$")>; + +// FP miscellaneous instructions +// ----------------------------------------------------------------------------- + +// FP convert, from gen to vec reg +def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; + +// FP convert, from vec to gen reg +def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; + +// FP convert, Javascript from vec to gen reg +// FP convert, from vec to vec reg +def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>; + +// FP move, immed +// FP move, register +def : SchedAlias<WriteFImm, N2Write_2cyc_1V>; + +// FP transfer, from gen to low half of vec reg +def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr, + FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>; + +// FP transfer, from gen to high half of vec reg +def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>; + +// FP transfer, from vec to gen reg +def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>; + +// FP load instructions +// ----------------------------------------------------------------------------- + +// Load vector reg, literal, S/D/Q forms +// Load vector reg, unscaled immed +def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$", + "^LDUR[BHSDQ]i$")>; + +// Load vector reg, immed post-index +def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>; +// Load vector reg, immed pre-index +def : InstRW<[N2Write_6cyc_1I_1L, WriteAdr], (instregex "^LDR[BHSDQ]pre$")>; + +// Load vector reg, unsigned immed +def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>; + +// Load vector reg, register offset, basic +// Load vector reg, register offset, scale, S/D-form +// Load vector reg, register offset, extend +// Load vector reg, register offset, extend, scale, S/D-form +def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; + +// Load vector reg, register offset, scale, H/Q-form +// Load vector reg, register offset, extend, scale, H/Q-form +def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; + +// Load vector pair, immed offset, S/D-form +def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>; + +// Load vector pair, immed offset, Q-form +def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>; + +// Load vector pair, immed post-index, S/D-form +// Load vector pair, immed pre-index, S/D-form +def : InstRW<[N2Write_6cyc_1I_1L, WriteLDHi, WriteAdr], + (instregex "^LDP[SD](pre|post)$")>; + +// Load vector pair, immed post-index, Q-form +// Load vector pair, immed pre-index, Q-form +def : InstRW<[N2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost, + LDPQpre)>; + +// FP store instructions +// ----------------------------------------------------------------------------- + +// Store vector reg, unscaled immed, B/H/S/D-form +// Store vector reg, unscaled immed, Q-form +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>; + +// Store vector reg, immed post-index, B/H/S/D-form +// Store vector reg, immed post-index, Q-form +// Store vector reg, immed pre-index, B/H/S/D-form +// Store vector reg, immed pre-index, Q-form +def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase], + (instregex "^STR[BHSDQ](pre|post)$")>; + +// Store vector reg, unsigned immed, B/H/S/D-form +// Store vector reg, unsigned immed, Q-form +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>; + +// Store vector reg, register offset, basic, B/H/S/D-form +// Store vector reg, register offset, basic, Q-form +// Store vector reg, register offset, scale, S/D-form +// Store vector reg, register offset, extend, B/H/S/D-form +// Store vector reg, register offset, extend, Q-form +// Store vector reg, register offset, extend, scale, S/D-form +def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase], + (instregex "^STR[BSD]ro[WX]$")>; + +// Store vector reg, register offset, scale, H-form +// Store vector reg, register offset, scale, Q-form +// Store vector reg, register offset, extend, scale, H-form +// Store vector reg, register offset, extend, scale, Q-form +def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase], + (instregex "^STR[HQ]ro[WX]$")>; + +// Store vector pair, immed offset, S-form +// Store vector pair, immed offset, D-form +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>; + +// Store vector pair, immed offset, Q-form +def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>; + +// Store vector pair, immed post-index, S-form +// Store vector pair, immed post-index, D-form +// Store vector pair, immed pre-index, S-form +// Store vector pair, immed pre-index, D-form +def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I], + (instregex "^STP[SD](pre|post)$")>; + +// Store vector pair, immed post-index, Q-form +def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>; + +// Store vector pair, immed pre-index, Q-form +def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>; + +// ASIMD integer instructions +// ----------------------------------------------------------------------------- + +// ASIMD absolute diff +// ASIMD absolute diff long +// ASIMD arith, basic +// ASIMD arith, complex +// ASIMD arith, pair-wise +// ASIMD compare +// ASIMD logical +// ASIMD max/min, basic and pair-wise +def : SchedAlias<WriteVd, N2Write_2cyc_1V>; +def : SchedAlias<WriteVq, N2Write_2cyc_1V>; + +// ASIMD absolute diff accum +// ASIMD absolute diff accum long +def : InstRW<[N2Write_4cyc_1V1], + (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; + +// ASIMD arith, reduce, 4H/4S +def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>; + +// ASIMD arith, reduce, 8B/8H +def : InstRW<[N2Write_4cyc_1V1_1V], + (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>; + +// ASIMD arith, reduce, 16B +def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v, + UADDLVv16i8v)>; + +// ASIMD dot product +// ASIMD dot product using signed and unsigned integers +def : InstRW<[N2Write_3cyc_1V], + (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>; + +// ASIMD matrix multiply-accumulate +def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>; + +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$", + "^[SU](MAX|MIN)Vv4i32v$")>; + +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$", + "^[SU](MAX|MIN)Vv8i16v$")>; + +// ASIMD max/min, reduce, 16B +def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>; + +// ASIMD multiply +def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>; + +// ASIMD multiply accumulate +def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>; + +// ASIMD multiply accumulate high +def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>; + +// ASIMD multiply accumulate long +def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>; + +// ASIMD multiply accumulate saturating long +def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>; + +// ASIMD multiply/multiply long (8x8) polynomial, D-form +// ASIMD multiply/multiply long (8x8) polynomial, Q-form +def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>; + +// ASIMD multiply long +def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>; + +// ASIMD pairwise add and accumulate long +def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>; + +// ASIMD shift accumulate +def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>; + +// ASIMD shift by immed, basic +def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv", + "^SSHLLv", "^SSHRv", "^USHLLv", + "^USHRv")>; + +// ASIMD shift by immed and insert, basic +def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>; + +// ASIMD shift by immed, complex +def : InstRW<[N2Write_4cyc_1V1], + (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv", + "^(SQSHLU?|UQSHL)[bhsd]$", + "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", + "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv", + "^UQSHRNv", "^URSHRv")>; + +// ASIMD shift by register, basic +def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>; + +// ASIMD shift by register, complex +def : InstRW<[N2Write_4cyc_1V1], + (instregex "^[SU]RSHLv", "^[SU]QRSHLv", + "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>; + +// ASIMD floating-point instructions +// ----------------------------------------------------------------------------- + +// ASIMD FP absolute value/difference +// ASIMD FP arith, normal +// ASIMD FP compare +// ASIMD FP complex add +// ASIMD FP max/min, normal +// ASIMD FP max/min, pairwise +// ASIMD FP negate +// Handled by SchedAlias<WriteV[dq], ...> + +// ASIMD FP complex multiply add +def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>; + +// ASIMD FP convert, long (F16 to F32) +def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>; + +// ASIMD FP convert, long (F32 to F64) +def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>; + +// ASIMD FP convert, narrow (F32 to F16) +def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>; + +// ASIMD FP convert, narrow (F64 to F32) +def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32", + "^FCVTXN(v2|v4)f32")>; + +// ASIMD FP convert, other, D-form F32 and Q-form F64 +def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$", + "^[SU]CVTFv2f(32|64)$")>; + +// ASIMD FP convert, other, D-form F16 and Q-form F32 +def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$", + "^[SU]CVTFv4f(16|32)$")>; + +// ASIMD FP convert, other, Q-form F16 +def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$", + "^[SU]CVTFv8f16$")>; + +// ASIMD FP divide, D-form, F16 +def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>; + +// ASIMD FP divide, D-form, F32 +def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>; + +// ASIMD FP divide, Q-form, F16 +def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>; + +// ASIMD FP divide, Q-form, F32 +def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>; + +// ASIMD FP divide, Q-form, F64 +def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>; + +// ASIMD FP max/min, reduce, F32 and D-form F16 +def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>; + +// ASIMD FP max/min, reduce, Q-form F16 +def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>; + +// ASIMD FP multiply +def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>; + +// ASIMD FP multiply accumulate +def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>; + +// ASIMD FP multiply accumulate long +def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>; + +// ASIMD FP round, D-form F32 and Q-form F64 +def : InstRW<[N2Write_3cyc_1V0], + (instregex "^FRINT[AIMNPXZ]v2f(32|64)$", + "^FRINT[32|64)[XZ]v2f(32|64)$")>; + +// ASIMD FP round, D-form F16 and Q-form F32 +def : InstRW<[N2Write_4cyc_2V0], + (instregex "^FRINT[AIMNPXZ]v4f(16|32)$", + "^FRINT(32|64)[XZ]v4f32$")>; + + +// ASIMD FP round, Q-form F16 +def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>; + +// ASIMD FP square root, D-form, F16 +def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>; + +// ASIMD FP square root, D-form, F32 +def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>; + +// ASIMD FP square root, Q-form, F16 +def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>; + +// ASIMD FP square root, Q-form, F32 +def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>; + +// ASIMD FP square root, Q-form, F64 +def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>; + +// ASIMD BFloat16 (BF16) instructions +// ----------------------------------------------------------------------------- + +// ASIMD convert, F32 to BF16 +def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>; + +// ASIMD dot product +def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>; + +// ASIMD matrix multiply accumulate +def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>; + +// ASIMD multiply accumulate long +def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT, + BFMLALTIdx)>; + +// Scalar convert, F32 to BF16 +def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>; + +// ASIMD miscellaneous instructions +// ----------------------------------------------------------------------------- + +// ASIMD bit reverse +// ASIMD bitwise insert +// ASIMD count +// ASIMD duplicate, element +// ASIMD extract +// ASIMD extract narrow +// ASIMD insert, element to element +// ASIMD move, FP immed +// ASIMD move, integer immed +// ASIMD reverse +// ASIMD table lookup, 1 or 2 table regs +// ASIMD table lookup extension, 1 table reg +// ASIMD transfer, element to gen reg +// ASIMD transpose +// ASIMD unzip/zip +// Handled by SchedAlias<WriteV[dq], ...> + +// ASIMD duplicate, gen reg +def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>; + +// ASIMD extract narrow, saturating +def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>; + +// ASIMD reciprocal and square root estimate, D-form U32 +def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>; + +// ASIMD reciprocal and square root estimate, Q-form U32 +def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>; + +// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms +def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32, + FRECPEv1i64, FRECPEv2f32, + FRSQRTEv1f16, FRSQRTEv1i32, + FRSQRTEv1i64, FRSQRTEv2f32)>; + +// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 +def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32, + FRSQRTEv4f16, FRSQRTEv4f32)>; + +// ASIMD reciprocal and square root estimate, Q-form F16 +def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>; + +// ASIMD reciprocal exponent +def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>; + +// ASIMD reciprocal step +def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>; + +// ASIMD table lookup, 3 table regs +def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>; + +// ASIMD table lookup, 4 table regs +def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>; + +// ASIMD table lookup extension, 2 table reg +def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>; + +// ASIMD table lookup extension, 3 table reg +def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>; + +// ASIMD table lookup extension, 4 table reg +def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>; + +// ASIMD transfer, gen reg to element +def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>; + +// ASIMD load instructions +// ----------------------------------------------------------------------------- + +// ASIMD load, 1 element, multiple, 1 reg, D-form +def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_6cyc_1L, WriteAdr], + (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; + +// ASIMD load, 1 element, multiple, 1 reg, Q-form +def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_6cyc_1L, WriteAdr], + (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 2 reg, D-form +def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_6cyc_2L, WriteAdr], + (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; + +// ASIMD load, 1 element, multiple, 2 reg, Q-form +def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_6cyc_2L, WriteAdr], + (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 3 reg, D-form +def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_6cyc_3L, WriteAdr], + (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; + +// ASIMD load, 1 element, multiple, 3 reg, Q-form +def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_6cyc_3L, WriteAdr], + (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 4 reg, D-form +def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_7cyc_4L, WriteAdr], + (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; + +// ASIMD load, 1 element, multiple, 4 reg, Q-form +def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_7cyc_4L, WriteAdr], + (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, one lane, B/H/S +// ASIMD load, 1 element, one lane, D +def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>; +def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; + +// ASIMD load, 1 element, all lanes, D-form, B/H/S +// ASIMD load, 1 element, all lanes, D-form, D +def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; + +// ASIMD load, 1 element, all lanes, Q-form +def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, multiple, D-form, B/H/S +def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; + +// ASIMD load, 2 element, multiple, Q-form, B/H/S +// ASIMD load, 2 element, multiple, Q-form, D +def : InstRW<[N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, one lane, B/H +// ASIMD load, 2 element, one lane, S +// ASIMD load, 2 element, one lane, D +def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>; +def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; + +// ASIMD load, 2 element, all lanes, D-form, B/H/S +// ASIMD load, 2 element, all lanes, D-form, D +def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; + +// ASIMD load, 2 element, all lanes, Q-form +def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, multiple, D-form, B/H/S +def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; + +// ASIMD load, 3 element, multiple, Q-form, B/H/S +def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)$")>; +def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; + +// ASIMD load, 3 element, multiple, Q-form, D +def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)$")>; +def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +// ASIMD load, 3 element, one lane, B/H +// ASIMD load, 3 element, one lane, S +// ASIMD load, 3 element, one lane, D +def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>; +def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; + +// ASIMD load, 3 element, all lanes, D-form, B/H/S +// ASIMD load, 3 element, all lanes, D-form, D +def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; + +// ASIMD load, 3 element, all lanes, Q-form, B/H/S +// ASIMD load, 3 element, all lanes, Q-form, D +def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, multiple, D-form, B/H/S +def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; + +// ASIMD load, 4 element, multiple, Q-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, D +def : InstRW<[N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_9cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, one lane, B/H +// ASIMD load, 4 element, one lane, S +// ASIMD load, 4 element, one lane, D +def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>; +def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; + +// ASIMD load, 4 element, all lanes, D-form, B/H/S +// ASIMD load, 4 element, all lanes, D-form, D +def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; + +// ASIMD load, 4 element, all lanes, Q-form, B/H/S +// ASIMD load, 4 element, all lanes, Q-form, D +def : InstRW<[N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; + +// ASIMD store instructions +// ----------------------------------------------------------------------------- + +// ASIMD store, 1 element, multiple, 1 reg, D-form +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; + +// ASIMD store, 1 element, multiple, 1 reg, Q-form +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 2 reg, D-form +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; + +// ASIMD store, 1 element, multiple, 2 reg, Q-form +def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 3 reg, D-form +def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; + +// ASIMD store, 1 element, multiple, 3 reg, Q-form +def : InstRW<[N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_2cyc_3L01_3V, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 4 reg, D-form +def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; + +// ASIMD store, 1 element, multiple, 4 reg, Q-form +def : InstRW<[N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_2cyc_4L01_4V, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, one lane, B/H/S +// ASIMD store, 1 element, one lane, D +def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; + +// ASIMD store, 2 element, multiple, D-form, B/H/S +def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; + +// ASIMD store, 2 element, multiple, Q-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, D +def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_4cyc_2L01_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 2 element, one lane, B/H/S +// ASIMD store, 2 element, one lane, D +def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)$")>; +def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; + +// ASIMD store, 3 element, multiple, D-form, B/H/S +def : InstRW<[N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[N2Write_5cyc_2L01_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; + +// ASIMD store, 3 element, multiple, Q-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, D +def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 3 element, one lane, B/H +// ASIMD store, 3 element, one lane, S +// ASIMD store, 3 element, one lane, D +def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)$")>; +def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; + +// ASIMD store, 4 element, multiple, D-form, B/H/S +def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; + +// ASIMD store, 4 element, multiple, Q-form, B/H/S +def : InstRW<[N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[N2Write_7cyc_6L01_6V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; + +// ASIMD store, 4 element, multiple, Q-form, D +def : InstRW<[N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[N2Write_5cyc_4L01_4V, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +// ASIMD store, 4 element, one lane, B/H/S +def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)$")>; +def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>; + +// ASIMD store, 4 element, one lane, D +def : InstRW<[N2Write_4cyc_3L01_3V], (instregex "ST4i(64)$")>; +def : InstRW<[N2Write_4cyc_3L01_3V, WriteAdr], (instregex "ST4i(64)_POST$")>; + +// Cryptography extensions +// ----------------------------------------------------------------------------- + +// Crypto AES ops +def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>; + +// Crypto polynomial (64x64) multiply long +def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>; + +// Crypto SHA1 hash acceleration op +// Crypto SHA1 schedule acceleration ops +def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>; + +// Crypto SHA1 hash acceleration ops +// Crypto SHA256 hash acceleration ops +def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>; + +// Crypto SHA256 schedule acceleration ops +def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>; + +// Crypto SHA512 hash acceleration ops +def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>; + +// Crypto SHA3 ops +def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; + +// Crypto SM3 ops +def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$", + "^SM3TT[12][AB]$")>; + +// Crypto SM4 ops +def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>; + +// CRC +// ----------------------------------------------------------------------------- + +def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>; + +// SVE Predicate instructions +// ----------------------------------------------------------------------------- + +// Loop control, based on predicate +def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP, + BRKB_PPmP, BRKB_PPzP)>; + +// Loop control, based on predicate and flag setting +def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>; + +// Loop control, propagating +def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; + +// Loop control, propagating and flag setting +def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP, + BRKPBS_PPzPP)>; + +// Loop control, based on GPR +def : InstRW<[N2Write_3cyc_1M], + (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; + +def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>; + +// Loop terminate +def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; + +// Predicate counting scalar +def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; +def : InstRW<[N2Write_2cyc_1M], + (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$", + "^SQ(DEC|INC)[BHWD]_XPiWdI$", + "^(UQDEC|UQINC)[BHWD]_WPiI$")>; + +// Predicate counting scalar, active predicate +def : InstRW<[N2Write_2cyc_1M], + (instregex "^CNTP_XPP_[BHSD]$", + "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$", + "^(UQDEC|UQINC)P_WP_[BHSD]$", + "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>; + +// Predicate counting vector, active predicate +def : InstRW<[N2Write_7cyc_1M_1M0_1V], + (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>; + +// Predicate logical +def : InstRW<[N2Write_1cyc_1M0], + (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; + +// Predicate logical, flag setting +def : InstRW<[N2Write_2cyc_1M0_1M], + (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>; + +// Predicate reverse +def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>; + +// Predicate select +def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>; + +// Predicate set +def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>; + +// Predicate set/initialize, set flags +def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>; + +// Predicate find first/next +def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>; + +// Predicate test +def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>; + +// Predicate transpose +def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>; + +// Predicate unpack and widen +def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; + +// Predicate zip/unzip +def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; + +// SVE integer instructions +// ----------------------------------------------------------------------------- + +// Arithmetic, absolute diff +def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>; + +// Arithmetic, absolute diff accum +def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>; + +// Arithmetic, absolute diff accum long +def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>; + +// Arithmetic, absolute diff long +def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>; + +// Arithmetic, basic +def : InstRW<[N2Write_2cyc_1V], + (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", + "^(ADD|SUB)_ZZZ_[BHSD]$", + "^(ADD|SUB|SUBR)_ZI_[BHSD]$", + "^ADR_[SU]XTW_ZZZ_D_[0123]$", + "^ADR_LSL_ZZZ_[SD]_[0123]$", + "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$", + "^SADDLBT_ZZZ_[HSD]$", + "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$", + "^SSUBL(BT|TB)_ZZZ_[HSD]$")>; + +// Arithmetic, complex +def : InstRW<[N2Write_2cyc_1V], + (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$", + "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", + "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$", + "^[SU]Q(ADD|SUB)_ZI_[BHSD]$", + "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$", + "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>; + +// Arithmetic, large integer +def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>; + +// Arithmetic, pairwise add +def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>; + +// Arithmetic, pairwise add and accum long +def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>; + +// Arithmetic, shift +def : InstRW<[N2Write_2cyc_1V1], + (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$", + "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$", + "^(ASR|LSL|LSR)_ZPmI_[BHSD]$", + "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$", + "^(ASR|LSL|LSR)_ZZI_[BHSD]$", + "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>; + +// Arithmetic, shift and accumulate +def : InstRW<[N2Write_4cyc_1V1], + (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>; + +// Arithmetic, shift by immediate +// Arithmetic, shift by immediate and insert +def : InstRW<[N2Write_2cyc_1V1], + (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>; + +// Arithmetic, shift complex +def : InstRW<[N2Write_4cyc_1V1], + (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$", + "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$", + "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$", + "^SQSHRU?N[BT]_ZZI_[BHS]$", + "^UQR?SHRN[BT]_ZZI_[BHS]$")>; + +// Arithmetic, shift right for divide +def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_ZPmI_[BHSD]$")>; + +// Arithmetic, shift rounding +def : InstRW<[N2Write_4cyc_1V1], + (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$", + "^[SU]RSHR_ZPmI_[BHSD]$")>; + +// Bit manipulation +def : InstRW<[N2Write_6cyc_2V1], + (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>; + +// Bitwise select +def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>; + +// Count/reverse bits +def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>; + +// Broadcast logical bitmask immediate to vector +def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>; + +// Compare and set flags +def : InstRW<[N2Write_4cyc_1V0_1M], + (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", + "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; + +// Complex add +def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>; + +// Complex dot product 8-bit element +def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; + +// Complex dot product 16-bit element +def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; + +// Complex multiply-add B, H, S element size +def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$", + "^CMLA_ZZZI_[HS]$")>; + +// Complex multiply-add D element size +def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>; + +// Conditional extract operations, scalar form +def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; + +// Conditional extract operations, SIMD&FP scalar and vector forms +def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", + "^COMPACT_ZPZ_[SD]$", + "^SPLICE_ZPZZ?_[BHSD]$")>; + +// Convert to floating point, 64b to float or convert to double +def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>; + +// Convert to floating point, 64b to half +def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_DtoH$")>; + +// Convert to floating point, 32b to single or half +def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>; + +// Convert to floating point, 32b to double +def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_StoD$")>; + +// Convert to floating point, 16b to half +def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>; + +// Copy, scalar +def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>; + +// Copy, scalar SIMD&FP or imm +def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$", + "^CPY_ZPzI_[BHSD]$")>; + +// Divides, 32 bit +def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>; + +// Divides, 64 bit +def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>; + +// Dot product, 8 bit +def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>; + +// Dot product, 8 bit, using signed and unsigned integers +def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; + +// Dot product, 16 bit +def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>; + +// Duplicate, immediate and indexed form +def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$", + "^DUP_ZZI_[BHSDQ]$")>; + +// Duplicate, scalar form +def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>; + +// Extend, sign or zero +def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$", + "^[SU]XTH_ZPmZ_[SD]$", + "^[SU]XTW_ZPmZ_[D]$")>; + +// Extract +def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>; + +// Extract narrow saturating +def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$", + "^SQXTUN[BT]_ZZ_[BHS]$")>; + +// Extract/insert operation, SIMD and FP scalar form +def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", + "^INSR_ZV_[BHSD]$")>; + +// Extract/insert operation, scalar +def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$", + "^INSR_ZR_[BHSD]$")>; + +// Histogram operations +def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$", + "^HISTSEG_ZZZ$")>; + +// Horizontal operations, B, H, S form, immediate operands only +def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>; + +// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar +// operands only / immediate, scalar operands +def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; + +// Horizontal operations, D form, immediate operands only +def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>; + +// Horizontal operations, D form, scalar, immediate operands)/ scalar operands +// only / immediate, scalar operands +def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>; + +// Logical +def : InstRW<[N2Write_2cyc_1V], + (instregex "^(AND|EOR|ORR)_ZI$", + "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$", + "^EOR(BT|TB)_ZZZ_[BHSD]$", + "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>; + +// Max/min, basic and pairwise +def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$", + "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>; + +// Matching operations +def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>; + +// Matrix multiply-accumulate +def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; + +// Move prefix +def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", + "^MOVPRFX_ZZ$")>; + +// Multiply, B, H, S element size +def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$", + "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>; + +// Multiply, D element size +def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$", + "^[SU]MULH_(ZPmZ|ZZZ)_D$")>; + +// Multiply long +def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$", + "^[SU]MULL[BT]_ZZZ_[HSD]$")>; + +// Multiply accumulate, B, H, S element size +def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$", + "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>; + +// Multiply accumulate, D element size +def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$", + "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>; + +// Multiply accumulate long +def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$", + "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>; + +// Multiply accumulate saturating doubling long regular +def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$", + "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>; + +// Multiply saturating doubling high, B, H, S element size +def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$", + "^SQDMULH_ZZZI_[HS]$")>; + +// Multiply saturating doubling high, D element size +def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; + +// Multiply saturating doubling long +def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$", + "^SQDMULL[BT]_ZZZI_[SD]$")>; + +// Multiply saturating rounding doubling regular/complex accumulate, B, H, S +// element size +def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$", + "^SQRDCMLAH_ZZZ_[BHS]$", + "^SQRDML[AS]H_ZZZI_[HS]$", + "^SQRDCMLAH_ZZZI_[HS]$")>; + +// Multiply saturating rounding doubling regular/complex accumulate, D element +// size +def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$", + "^SQRDCMLAH_ZZZ_D$")>; + +// Multiply saturating rounding doubling regular/complex, B, H, S element size +def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$", + "^SQRDMULH_ZZZI_[HS]$")>; + +// Multiply saturating rounding doubling regular/complex, D element size +def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>; + +// Multiply/multiply long, (8x8) polynomial +def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$", + "^PMULL[BT]_ZZZ_[HDQ]$")>; + +// Predicate counting vector +def : InstRW<[N2Write_2cyc_1V0], + (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>; + +// Reciprocal estimate +def : InstRW<[N2Write_4cyc_2V0], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>; + +// Reduction, arithmetic, B form +def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; + +// Reduction, arithmetic, H form +def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; + +// Reduction, arithmetic, S form +def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; + +// Reduction, arithmetic, D form +def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; + +// Reduction, logical +def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>; + +// Reverse, vector +def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$", + "^REVB_ZPmZ_[HSD]$", + "^REVH_ZPmZ_[SD]$", + "^REVW_ZPmZ_D$")>; + +// Select, vector form +def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>; + +// Table lookup +def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>; + +// Table lookup extension +def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>; + +// Transpose, vector form +def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>; + +// Unpack and extend +def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>; + +// Zip/unzip +def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; + +// SVE floating-point instructions +// ----------------------------------------------------------------------------- + +// Floating point absolute value/difference +def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>; + +// Floating point arithmetic +def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", + "^FADDP_ZPmZZ_[HSD]$", + "^FNEG_ZPmZ_[HSD]$", + "^FSUBR_ZPm[IZ]_[HSD]$")>; + +// Floating point associative add, F16 +def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>; + +// Floating point associative add, F32 +def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>; + +// Floating point associative add, F64 +def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>; + +// Floating point compare +def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$", + "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$", + "^FCM(LE|LT)_PPzZ0_[HSD]$", + "^FCMUO_PPzZZ_[HSD]$")>; + +// Floating point complex add +def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>; + +// Floating point complex multiply add +def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$", + "^FCMLA_ZZZI_[HS]$")>; + +// Floating point convert, long or narrow (F16 to F32 or F32 to F16) +def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$", + "^FCVTLT_ZPmZ_HtoS$", + "^FCVTNT_ZPmZ_StoH$")>; + +// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 +// or F64 to F16) +def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$", + "^FCVTLT_ZPmZ_StoD$", + "^FCVTNT_ZPmZ_DtoS$")>; + +// Floating point convert, round to odd +def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>; + +// Floating point base2 log, F16 +def : InstRW<[N2Write_6cyc_4V0], (instrs FLOGB_ZPmZ_H)>; + +// Floating point base2 log, F32 +def : InstRW<[N2Write_4cyc_2V0], (instrs FLOGB_ZPmZ_S)>; + +// Floating point base2 log, F64 +def : InstRW<[N2Write_3cyc_1V0], (instrs FLOGB_ZPmZ_D)>; + +// Floating point convert to integer, F16 +def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>; + +// Floating point convert to integer, F32 +def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>; + +// Floating point convert to integer, F64 +def : InstRW<[N2Write_3cyc_1V0], + (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>; + +// Floating point copy +def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$", + "^FDUP_ZI_[HSD]$")>; + +// Floating point divide, F16 +def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_ZPmZ_H$")>; + +// Floating point divide, F32 +def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_ZPmZ_S$")>; + +// Floating point divide, F64 +def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_ZPmZ_D$")>; + +// Floating point min/max pairwise +def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>; + +// Floating point min/max +def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>; + +// Floating point multiply +def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$", + "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>; + +// Floating point multiply accumulate +def : InstRW<[N2Write_4cyc_1V], + (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$", + "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>; + +// Floating point multiply add/sub accumulate long +def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>; + +// Floating point reciprocal estimate, F16 +def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H, + FRSQRTE_ZZ_H)>; + +// Floating point reciprocal estimate, F32 +def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S, + FRSQRTE_ZZ_S)>; + +// Floating point reciprocal estimate, F64 +def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D, + FRSQRTE_ZZ_D)>; + +// Floating point reciprocal step +def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; + +// Floating point reduction, F16 +def : InstRW<[N2Write_6cyc_2V], + (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>; + +// Floating point reduction, F32 +def : InstRW<[N2Write_4cyc_1V], + (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>; + +// Floating point reduction, F64 +def : InstRW<[N2Write_2cyc_1V], + (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>; + +// Floating point round to integral, F16 +def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>; + +// Floating point round to integral, F32 +def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>; + +// Floating point round to integral, F64 +def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>; + +// Floating point square root, F16 +def : InstRW<[N2Write_13cyc_1V0], (instrs FSQRT_ZPmZ_H)>; + +// Floating point square root, F32 +def : InstRW<[N2Write_10cyc_1V0], (instrs FSQRT_ZPmZ_S)>; + +// Floating point square root, F64 +def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRT_ZPmZ_D)>; + +// Floating point trigonometric exponentiation +def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>; + +// Floating point trigonometric multiply add +def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>; + +// Floating point trigonometric, miscellaneous +def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; + +// SVE BFloat16 (BF16) instructions +// ----------------------------------------------------------------------------- + +// Convert, F32 to BF16 +def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; + +// Dot product +def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; + +// Matrix multiply accumulate +def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>; + +// Multiply accumulate long +def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZ[ZI]$")>; + +// SVE Load instructions +// ----------------------------------------------------------------------------- + +// Load vector +def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>; + +// Load predicate +def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>; + +// Contiguous load, scalar + imm +def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$", + "^LD1S?B_[HSD]_IMM_REAL$", + "^LD1S?H_[SD]_IMM_REAL$", + "^LD1S?W_D_IMM_REAL$" )>; +// Contiguous load, scalar + scalar +def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$", + "^LD1S?B_[HSD]$", + "^LD1S?H_[SD]$", + "^LD1S?W_D$" )>; + +// Contiguous load broadcast, scalar + imm +def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$", + "^LD1RSW_IMM$", + "^LD1RS?B_[HSD]_IMM$", + "^LD1RS?H_[SD]_IMM$", + "^LD1RS?W_D_IMM$", + "^LD1RQ_[BHWD]_IMM$")>; + +// Contiguous load broadcast, scalar + scalar +def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>; + +// Non temporal load, scalar + imm +def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>; + +// Non temporal load, scalar + scalar +def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>; + +// Non temporal gather load, vector + scalar 32-bit element size +def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$", + "^LDNT1S[BH]_ZZR_S_REAL$")>; + +// Non temporal gather load, vector + scalar 64-bit element size +def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>; +def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>; + +// Contiguous first faulting load, scalar + scalar +def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$", + "^LDFF1S?B_[HSD]_REAL$", + "^LDFF1S?H_[SD]_REAL$", + "^LDFF1S?W_D_REAL$")>; + +// Contiguous non faulting load, scalar + imm +def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$", + "^LDNF1S?B_[HSD]_IMM_REAL$", + "^LDNF1S?H_[SD]_IMM_REAL$", + "^LDNF1S?W_D_IMM_REAL$")>; + +// Contiguous Load two structures to two vectors, scalar + imm +def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>; + +// Contiguous Load two structures to two vectors, scalar + scalar +def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>; + +// Contiguous Load three structures to three vectors, scalar + imm +def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>; + +// Contiguous Load three structures to three vectors, scalar + scalar +def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>; + +// Contiguous Load four structures to four vectors, scalar + imm +def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>; + +// Contiguous Load four structures to four vectors, scalar + scalar +def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>; + +// Gather load, vector + imm, 32-bit element size +def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", + "^GLD(FF)?1W_IMM_REAL$")>; + +// Gather load, vector + imm, 64-bit element size +def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", + "^GLD(FF)?1D_IMM_REAL$")>; + +// Gather load, 64-bit element size +def : InstRW<[N2Write_9cyc_2L_2V], + (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$", + "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$", + "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$", + "^GLD(FF)?1D_(SCALED_)?REAL$")>; + +// Gather load, 32-bit scaled offset +def : InstRW<[N2Write_10cyc_2L_2V], + (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$", + "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; + +// Gather load, 32-bit unpacked unscaled offset +def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", + "^GLD(FF)?1W_[SU]XTW_REAL$")>; + +// SVE Store instructions +// ----------------------------------------------------------------------------- + +// Store from predicate reg +def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>; + +// Store from vector reg +def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>; + +// Contiguous store, scalar + imm +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$", + "^ST1B_[HSD]_IMM$", + "^ST1H_[SD]_IMM$", + "^ST1W_D_IMM$")>; + +// Contiguous store, scalar + scalar +def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>; +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$", + "^ST1B_[HSD]$", + "^ST1W_D$")>; + +// Contiguous store two structures from two vectors, scalar + imm +def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>; + +// Contiguous store two structures from two vectors, scalar + scalar +def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>; + +// Contiguous store two structures from two vectors, scalar + scalar +def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>; + +// Contiguous store three structures from three vectors, scalar + imm +def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>; + +// Contiguous store three structures from three vectors, scalar + scalar +def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>; + +// Contiguous store three structures from three vectors, scalar + scalar +def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>; + +// Contiguous store four structures from four vectors, scalar + imm +def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>; + +// Contiguous store four structures from four vectors, scalar + scalar +def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>; + +// Contiguous store four structures from four vectors, scalar + scalar +def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>; + +// Non temporal store, scalar + imm +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; + +// Non temporal store, scalar + scalar +def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>; +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; + +// Scatter non temporal store, vector + scalar 32-bit element size +def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>; + +// Scatter non temporal store, vector + scalar 64-bit element size +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>; + +// Scatter store vector + imm 32-bit element size +def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$", + "^SST1W_IMM$")>; + +// Scatter store vector + imm 64-bit element size +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", + "^SST1D_IMM$")>; + +// Scatter store, 32-bit scaled offset +def : InstRW<[N2Write_4cyc_2L01_2V], + (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>; + +// Scatter store, 32-bit unpacked unscaled offset +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$", + "^SST1D_[SU]XTW$")>; + +// Scatter store, 32-bit unpacked scaled offset +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", + "^SST1D_[SU]XTW_SCALED$")>; + +// Scatter store, 32-bit unscaled offset +def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$", + "^SST1W_[SU]XTW$")>; + +// Scatter store, 64-bit scaled offset +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$", + "^SST1D_SCALED$")>; + +// Scatter store, 64-bit unscaled offset +def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$", + "^SST1D$")>; + +// SVE Miscellaneous instructions +// ----------------------------------------------------------------------------- + +// Read first fault register, unpredicated +def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>; + +// Read first fault register, predicated +def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>; + +// Read first fault register and set flags +def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>; + +// Set first fault register +// Write to first fault register +def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>; + +// Prefetch +def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>; + +// SVE Cryptographic instructions +// ----------------------------------------------------------------------------- + +// Crypto AES ops +def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$", + "^AESI?MC_ZZ_B$")>; + +// Crypto SHA3 ops +def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$", + "^RAX1_ZZZ_D$", + "^XAR_ZZZI_[BHSD]$")>; + +// Crypto SM4 ops +def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>; + +} diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td index 6ecfc97a4273..9c1bf3231a55 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td +++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td @@ -26,7 +26,8 @@ def TSV110Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); } // Define each kind of processor resource and number available on the TSV110, diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td index ff34c0ce9a0c..8b380ae0e8f3 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td @@ -27,7 +27,8 @@ def ThunderXT8XModel : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td index ffa0a5e7d91a..cdafa33da054 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -27,7 +27,8 @@ def ThunderX2T99Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, - SMEUnsupported.F); + SMEUnsupported.F, + [HasMTE]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td index 46a1c217f984..5b1e9b5bcf23 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td @@ -25,7 +25,8 @@ def ThunderX3T110Model : SchedMachineModel { let CompleteModel = 1; list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + [HasMTE]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 41c7a8c5042f..274a025e82a0 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -796,6 +796,50 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC, return IC.replaceInstUsesWith(II, Extract); } +static Optional<Instruction *> instCombineSVECondLast(InstCombiner &IC, + IntrinsicInst &II) { + // The SIMD&FP variant of CLAST[AB] is significantly faster than the scalar + // integer variant across a variety of micro-architectures. Replace scalar + // integer CLAST[AB] intrinsic with optimal SIMD&FP variant. A simple + // bitcast-to-fp + clast[ab] + bitcast-to-int will cost a cycle or two more + // depending on the micro-architecture, but has been observed as generally + // being faster, particularly when the CLAST[AB] op is a loop-carried + // dependency. + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + Value *Pg = II.getArgOperand(0); + Value *Fallback = II.getArgOperand(1); + Value *Vec = II.getArgOperand(2); + Type *Ty = II.getType(); + + if (!Ty->isIntegerTy()) + return None; + + Type *FPTy; + switch (cast<IntegerType>(Ty)->getBitWidth()) { + default: + return None; + case 16: + FPTy = Builder.getHalfTy(); + break; + case 32: + FPTy = Builder.getFloatTy(); + break; + case 64: + FPTy = Builder.getDoubleTy(); + break; + } + + Value *FPFallBack = Builder.CreateBitCast(Fallback, FPTy); + auto *FPVTy = VectorType::get( + FPTy, cast<VectorType>(Vec->getType())->getElementCount()); + Value *FPVec = Builder.CreateBitCast(Vec, FPVTy); + auto *FPII = Builder.CreateIntrinsic(II.getIntrinsicID(), {FPVec->getType()}, + {Pg, FPFallBack, FPVec}); + Value *FPIItoInt = Builder.CreateBitCast(FPII, II.getType()); + return IC.replaceInstUsesWith(II, FPIItoInt); +} + static Optional<Instruction *> instCombineRDFFR(InstCombiner &IC, IntrinsicInst &II) { LLVMContext &Ctx = II.getContext(); @@ -1294,6 +1338,9 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_lasta: case Intrinsic::aarch64_sve_lastb: return instCombineSVELast(IC, II); + case Intrinsic::aarch64_sve_clasta_n: + case Intrinsic::aarch64_sve_clastb_n: + return instCombineSVECondLast(IC, II); case Intrinsic::aarch64_sve_cntd: return instCombineSVECntElts(IC, II, 2); case Intrinsic::aarch64_sve_cntw: diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index d0aacb457a39..59ec91843266 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -334,8 +334,10 @@ public: return 2; } - bool emitGetActiveLaneMask() const { - return ST->hasSVE(); + PredicationStyle emitGetActiveLaneMask() const { + if (ST->hasSVE()) + return PredicationStyle::DataAndControlFlow; + return PredicationStyle::None; } bool supportsScalableVectors() const { return ST->hasSVE(); } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 89e1d85a6085..aaef363e9b8d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/LowLevelType.h" @@ -354,7 +355,9 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, "Return value without a vreg"); bool Success = true; - if (!VRegs.empty()) { + if (!FLI.CanLowerReturn) { + insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister); + } else if (!VRegs.empty()) { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); @@ -464,6 +467,18 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, return Success; } +bool AArch64CallLowering::canLowerReturn(MachineFunction &MF, + CallingConv::ID CallConv, + SmallVectorImpl<BaseArgInfo> &Outs, + bool IsVarArg) const { + SmallVector<CCValAssign, 16> ArgLocs; + const auto &TLI = *getTLI<AArch64TargetLowering>(); + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, + MF.getFunction().getContext()); + + return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv)); +} + /// Helper function to compute forwarded registers for musttail calls. Computes /// the forwarded registers, sets MBB liveness, and emits COPY instructions that /// can be used to save + restore registers later. @@ -533,6 +548,12 @@ bool AArch64CallLowering::lowerFormalArguments( SmallVector<ArgInfo, 8> SplitArgs; SmallVector<std::pair<Register, Register>> BoolArgs; + + // Insert the hidden sret parameter if the return value won't fit in the + // return registers. + if (!FLI.CanLowerReturn) + insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL); + unsigned i = 0; for (auto &Arg : F.args()) { if (DL.getTypeStoreSize(Arg.getType()).isZero()) @@ -1194,7 +1215,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arguments, the physical register must be an // implicit-define of the call instruction. - if (!Info.OrigRet.Ty->isVoidTy()) { + if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) { CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv); CallReturnHandler Handler(MIRBuilder, MRI, MIB); bool UsingReturnedArg = @@ -1226,6 +1247,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, .addImm(Assigner.StackOffset) .addImm(CalleePopBytes); + if (!Info.CanLowerReturn) { + insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs, + Info.DemoteRegister, Info.DemoteStackIndex); + } return true; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h index aafb1d19640a..cbdf77f69a63 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h @@ -35,6 +35,10 @@ public: ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI, Register SwiftErrorVReg) const override; + bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv, + SmallVectorImpl<BaseArgInfo> &Outs, + bool IsVarArg) const override; + bool fallBackToDAGISel(const MachineFunction &MF) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 9a65687735fe..eb8d0552173d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1710,11 +1710,6 @@ bool AArch64InstructionSelector::selectCompareBranch( MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { Register CondReg = I.getOperand(0).getReg(); MachineInstr *CCMI = MRI.getVRegDef(CondReg); - if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) { - CondReg = CCMI->getOperand(1).getReg(); - CCMI = MRI.getVRegDef(CondReg); - } - // Try to select the G_BRCOND using whatever is feeding the condition if // possible. unsigned CCMIOpc = CCMI->getOpcode(); @@ -3346,12 +3341,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_SELECT: { auto &Sel = cast<GSelect>(I); - if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) { - LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty - << ", expected: " << LLT::scalar(1) << '\n'); - return false; - } - const Register CondReg = Sel.getCondReg(); const Register TReg = Sel.getTrueReg(); const Register FReg = Sel.getFalseReg(); @@ -4777,12 +4766,6 @@ static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, return false; MachineInstr *ValDef = MRI.getVRegDef(Val); unsigned Opcode = ValDef->getOpcode(); - if (Opcode == TargetOpcode::G_TRUNC) { - // Look through a trunc. - Val = ValDef->getOperand(1).getReg(); - ValDef = MRI.getVRegDef(Val); - Opcode = ValDef->getOpcode(); - } if (isa<GAnyCmp>(ValDef)) { CanNegate = true; MustBeFirst = false; @@ -4870,12 +4853,6 @@ MachineInstr *AArch64InstructionSelector::emitConjunctionRec( auto &MRI = *MIB.getMRI(); MachineInstr *ValDef = MRI.getVRegDef(Val); unsigned Opcode = ValDef->getOpcode(); - if (Opcode == TargetOpcode::G_TRUNC) { - // Look through a trunc. - Val = ValDef->getOperand(1).getReg(); - ValDef = MRI.getVRegDef(Val); - Opcode = ValDef->getOpcode(); - } if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) { Register LHS = Cmp->getLHSReg(); Register RHS = Cmp->getRHSReg(); @@ -5026,31 +5003,17 @@ bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { // First, check if the condition is defined by a compare. MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); - while (CondDef) { - // We can only fold if all of the defs have one use. - Register CondDefReg = CondDef->getOperand(0).getReg(); - if (!MRI.hasOneNonDBGUse(CondDefReg)) { - // Unless it's another select. - for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { - if (CondDef == &UI) - continue; - if (UI.getOpcode() != TargetOpcode::G_SELECT) - return false; - } - } - - // We can skip over G_TRUNC since the condition is 1-bit. - // Truncating/extending can have no impact on the value. - unsigned Opc = CondDef->getOpcode(); - if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) - break; - - // Can't see past copies from physregs. - if (Opc == TargetOpcode::COPY && - Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) - return false; - CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); + // We can only fold if all of the defs have one use. + Register CondDefReg = CondDef->getOperand(0).getReg(); + if (!MRI.hasOneNonDBGUse(CondDefReg)) { + // Unless it's another select. + for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { + if (CondDef == &UI) + continue; + if (UI.getOpcode() != TargetOpcode::G_SELECT) + return false; + } } // Is the condition defined by a compare? diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 74ec9373ce9e..d3617b87a851 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -42,7 +42,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) : ST(&ST) { using namespace TargetOpcode; const LLT p0 = LLT::pointer(0, 64); - const LLT s1 = LLT::scalar(1); const LLT s8 = LLT::scalar(8); const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); @@ -80,7 +79,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) const LLT &MinFPScalar = HasFP16 ? s16 : s32; getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) - .legalFor({p0, s1, s8, s16, s32, s64}) + .legalFor({p0, s8, s16, s32, s64}) .legalFor(PackedVectorAllTypeList) .widenScalarToNextPow2(0) .clampScalar(0, s8, s64) @@ -198,8 +197,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder( {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO}) - .legalFor({{s32, s1}, {s64, s1}}) + .legalFor({{s32, s32}, {s64, s32}}) .clampScalar(0, s32, s64) + .clampScalar(1, s32, s64) .widenScalarToNextPow2(0); getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG}) @@ -241,7 +241,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_INSERT) .legalIf(all(typeInSet(0, {s32, s64, p0}), - typeInSet(1, {s1, s8, s16, s32}), smallerThan(1, 0))) + typeInSet(1, {s8, s16, s32}), smallerThan(1, 0))) .widenScalarToNextPow2(0) .clampScalar(0, s32, s64) .widenScalarToNextPow2(1) @@ -260,8 +260,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32) .maxScalarIf(typeInSet(1, {s128}), 0, s64); - getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) - .lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)) + + for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) { + auto &Actions = getActionDefinitionsBuilder(Op); + + if (Op == G_SEXTLOAD) + Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)); + + // Atomics have zero extending behavior. + Actions .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8}, @@ -278,6 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .unsupportedIfMemSizeNotPow2() // Lower anything left over into G_*EXT and G_LOAD .lower(); + } auto IsPtrVecPred = [=](const LegalityQuery &Query) { const LLT &ValTy = Query.Types[0]; @@ -425,10 +433,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) const LLT &SrcTy = Query.Types[1]; - // Special case for s1. - if (SrcTy == s1) - return true; - // Make sure we fit in a register otherwise. Don't bother checking that // the source type is below 128 bits. We shouldn't be allowing anything // through which is wider than the destination in the first place. @@ -481,13 +485,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .widenScalarToNextPow2(0); // Control-flow - getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32}); + getActionDefinitionsBuilder(G_BRCOND) + .legalFor({s32}) + .clampScalar(0, s32, s32); getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); getActionDefinitionsBuilder(G_SELECT) - .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) + .legalFor({{s32, s32}, {s64, s32}, {p0, s32}}) .widenScalarToNextPow2(0) .clampScalar(0, s32, s64) + .clampScalar(1, s32, s32) .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0) .lowerIf(isVector(0)); @@ -500,7 +507,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); getActionDefinitionsBuilder(G_PTRTOINT) - .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0}) + .legalForCartesianProduct({s8, s16, s32, s64}, {p0}) .legalFor({{v2s64, v2p0}}) .maxScalar(0, s64) .widenScalarToNextPow2(0, /*Min*/ 8); @@ -517,7 +524,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) // FIXME: This is wrong since G_BITCAST is not allowed to change the // number of bits but it's what the previous code described and fixing // it breaks tests. - .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8, + .legalForCartesianProduct({s8, s16, s32, s64, s128, v16s8, v8s8, v4s8, v8s16, v4s16, v2s16, v4s32, v2s32, v2s64, v2p0}); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 2901e5c0fe4d..bd0a497fa441 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -43,11 +43,9 @@ namespace { class AArch64MCCodeEmitter : public MCCodeEmitter { MCContext &Ctx; - const MCInstrInfo &MCII; public: - AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) - : Ctx(ctx), MCII(mcii) {} + AArch64MCCodeEmitter(const MCInstrInfo &, MCContext &ctx) : Ctx(ctx) {} AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) = delete; void operator=(const AArch64MCCodeEmitter &) = delete; ~AArch64MCCodeEmitter() override = default; @@ -193,12 +191,6 @@ public: uint32_t encodeMatrixIndexGPR32(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - -private: - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; }; } // end anonymous namespace @@ -618,9 +610,6 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - verifyInstructionPredicates(MI, - computeAvailableFeatures(STI.getFeatureBits())); - if (MI.getOpcode() == AArch64::TLSDESCCALL) { // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the // following (BLR) instruction. It doesn't emit any code itself so it @@ -674,7 +663,6 @@ unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison( return EncodedValue; } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "AArch64GenMCCodeEmitter.inc" MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 34e3b2cf58e4..f129bfe11e4d 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -34,6 +34,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC #define GET_INSTRINFO_MC_HELPERS +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "AArch64GenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 049c49796dc6..7d1de3e53c0c 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -33,6 +33,7 @@ class MCSubtargetInfo; class MCTargetOptions; class MCTargetStreamer; class Target; +class FeatureBitset; MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx); diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 2744e81f99f1..cb36aa26e839 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -227,6 +227,40 @@ class sme_add_vector_to_tile_u64<bit V, string mnemonic> let Inst{2-0} = ZAda; } +class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty> + : Pseudo<(outs), + (ins i64imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let usesCustomInserter = 1; +} + +def ADDHA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>; +def ADDVA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>; + +def : Pat<(int_aarch64_sme_addha + imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + (nxv4i32 ZPR32:$zn)), + (ADDHA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; +def : Pat<(int_aarch64_sme_addva + imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + (nxv4i32 ZPR32:$zn)), + (ADDVA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; + +let Predicates = [HasSMEI64] in { +def ADDHA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>; +def ADDVA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>; + +def : Pat<(int_aarch64_sme_addha + imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + (nxv2i64 ZPR64:$zn)), + (ADDHA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; +def : Pat<(int_aarch64_sme_addva + imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + (nxv2i64 ZPR64:$zn)), + (ADDVA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; +} + //===----------------------------------------------------------------------===// // SME Contiguous Loads //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 3631536a32b9..7cdd4c4af95e 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -650,11 +650,11 @@ multiclass sve_int_pfalse<bits<6> opc, string asm> { def : Pat<(nxv1i1 immAllZerosV), (!cast<Instruction>(NAME))>; } -class sve_int_ptest<bits<6> opc, string asm> +class sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op> : I<(outs), (ins PPRAny:$Pg, PPR8:$Pn), asm, "\t$Pg, $Pn", "", - []>, Sched<[]> { + [(op (nxv16i1 PPRAny:$Pg), (nxv16i1 PPR8:$Pn))]>, Sched<[]> { bits<4> Pg; bits<4> Pn; let Inst{31-24} = 0b00100101; @@ -1691,6 +1691,9 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op, !cast<Instruction>(NAME), PTRUE_S>; def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2i1, nxv2i1, !cast<Instruction>(NAME), PTRUE_D>; + // Emulate .Q operation using a PTRUE_D when the other lanes don't matter. + def : SVE_2_Op_AllActive_Pat<nxv1i1, op_nopred, nxv1i1, nxv1i1, + !cast<Instruction>(NAME), PTRUE_D>; } // An instance of sve_int_pred_log_and but uses op_nopred's first operand as the @@ -1706,6 +1709,9 @@ multiclass sve_int_pred_log_v2<bits<4> opc, string asm, SDPatternOperator op, (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>; def : Pat<(nxv2i1 (op_nopred nxv2i1:$Op1, nxv2i1:$Op2)), (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>; + // Emulate .Q operation using a PTRUE_D when the other lanes don't matter. + def : Pat<(nxv1i1 (op_nopred nxv1i1:$Op1, nxv1i1:$Op2)), + (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 71303611265c..cf8891cff1b3 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -343,7 +343,8 @@ struct SysAlias { : Name(N), Encoding(E), FeaturesRequired(F) {} bool haveFeatures(FeatureBitset ActiveFeatures) const { - return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; + return ActiveFeatures[llvm::AArch64::FeatureAll] || + (FeaturesRequired & ActiveFeatures) == FeaturesRequired; } FeatureBitset getRequiredFeatures() const { return FeaturesRequired; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index c4680cbedadf..91dc611fb265 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -317,6 +317,9 @@ extern char &SIFormMemoryClausesID; void initializeSIPostRABundlerPass(PassRegistry&); extern char &SIPostRABundlerID; +void initializeGCNCreateVOPDPass(PassRegistry &); +extern char &GCNCreateVOPDID; + void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); extern char &AMDGPUUnifyDivergentExitNodesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 94d7844e8a32..a8108b1d637b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -626,13 +626,13 @@ bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const { Constant *FoldedT = SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) : ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL); - if (isa<ConstantExpr>(FoldedT)) + if (!FoldedT || isa<ConstantExpr>(FoldedT)) return false; Constant *FoldedF = SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) : ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL); - if (isa<ConstantExpr>(FoldedF)) + if (!FoldedF || isa<ConstantExpr>(FoldedF)) return false; IRBuilder<> Builder(&BO); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b00df27f5fd3..589992c7a7ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1883,20 +1883,24 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, return true; } +// Match an immediate (if Imm is true) or an SGPR (if Imm is false) +// offset. If Imm32Only is true, match only 32-bit immediate offsets +// available on CI. bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, - SDValue &Offset, bool &Imm) const { + SDValue &Offset, bool Imm, + bool Imm32Only) const { ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); if (!C) { + if (Imm) + return false; if (ByteOffsetNode.getValueType().isScalarInteger() && ByteOffsetNode.getValueType().getSizeInBits() == 32) { Offset = ByteOffsetNode; - Imm = false; return true; } if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) { Offset = ByteOffsetNode.getOperand(0); - Imm = false; return true; } } @@ -1908,9 +1912,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, int64_t ByteOffset = C->getSExtValue(); Optional<int64_t> EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false); - if (EncodedOffset) { + if (EncodedOffset && Imm && !Imm32Only) { Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); - Imm = true; return true; } @@ -1919,7 +1922,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, return false; EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset); - if (EncodedOffset) { + if (EncodedOffset && Imm32Only) { Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); return true; } @@ -1927,11 +1930,14 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset)) return false; - SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); - Offset = SDValue( - CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0); + if (!Imm) { + SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); + Offset = SDValue( + CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0); + return true; + } - return true; + return false; } SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { @@ -1959,8 +1965,12 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { Ops), 0); } +// Match a base and an immediate (if Imm is true) or an SGPR +// (if Imm is false) offset. If Imm32Only is true, match only 32-bit +// immediate offsets available on CI. bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, - SDValue &Offset, bool &Imm) const { + SDValue &Offset, bool Imm, + bool Imm32Only) const { SDLoc SL(Addr); // A 32-bit (address + offset) should not cause unsigned 32-bit integer @@ -1977,41 +1987,34 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, assert(N0 && N1 && isa<ConstantSDNode>(N1)); } if (N0 && N1) { - if (SelectSMRDOffset(N1, Offset, Imm)) { + if (SelectSMRDOffset(N1, Offset, Imm, Imm32Only)) { SBase = Expand32BitAddress(N0); return true; } } + return false; } + if (!Imm) + return false; SBase = Expand32BitAddress(Addr); Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); - Imm = true; return true; } bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const { - bool Imm = false; - return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; + return SelectSMRD(Addr, SBase, Offset, /* Imm */ true); } bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const { - assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS); - - bool Imm = false; - if (!SelectSMRD(Addr, SBase, Offset, Imm)) - return false; - - return !Imm && isa<ConstantSDNode>(Offset); + return SelectSMRD(Addr, SBase, Offset, /* Imm */ true, /* Imm32Only */ true); } bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const { - bool Imm = false; - return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && - !isa<ConstantSDNode>(Offset); + return SelectSMRD(Addr, SBase, Offset, /* Imm */ false); } bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 862be9dc5568..7894b8eb5b67 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -193,11 +193,11 @@ private: bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &SAddr, SDValue &Offset) const; - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, - bool &Imm) const; + bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool Imm, + bool Imm32Only) const; SDValue Expand32BitAddress(SDValue Addr) const; - bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, - bool &Imm) const; + bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, bool Imm, + bool Imm32Only = false) const; bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index ef7929012597..bf520a560404 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4803,6 +4803,8 @@ AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { case AtomicRMWInst::Nand: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: return AtomicExpansionKind::CmpXChg; default: return AtomicExpansionKind::None; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 3f242fdb6d8e..70fae9d784a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1180,7 +1180,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const { getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI); if (Arg) { - const int64_t Value = Arg.getValue().Value.getSExtValue(); + const int64_t Value = Arg.value().Value.getSExtValue(); if (Value == 0) { unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0); @@ -3235,7 +3235,7 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0) const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES) - return false; + return Register(); if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) { return Def->getOperand(1).getReg(); @@ -3851,27 +3851,36 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { getAddrModeInfo(*MI, *MRI, AddrInfo); // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, - // then we can select all ptr + 32-bit offsets not just immediate offsets. - if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) + // then we can select all ptr + 32-bit offsets. + if (AddrInfo.empty()) return None; const GEPInfo &GEPInfo = AddrInfo[0]; + Register PtrReg = GEPInfo.SgprParts[0]; + // SGPR offset is unsigned. - if (!GEPInfo.Imm || GEPInfo.Imm < 0 || !isUInt<32>(GEPInfo.Imm)) - return None; + if (AddrInfo[0].SgprParts.size() == 1 && isUInt<32>(GEPInfo.Imm) && + GEPInfo.Imm != 0) { + // If we make it this far we have a load with an 32-bit immediate offset. + // It is OK to select this using a sgpr offset, because we have already + // failed trying to select this load into one of the _IMM variants since + // the _IMM Patterns are considered before the _SGPR patterns. + Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) + .addImm(GEPInfo.Imm); + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}}; + } - // If we make it this far we have a load with an 32-bit immediate offset. - // It is OK to select this using a sgpr offset, because we have already - // failed trying to select this load into one of the _IMM variants since - // the _IMM Patterns are considered before the _SGPR patterns. - Register PtrReg = GEPInfo.SgprParts[0]; - Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) - .addImm(GEPInfo.Imm); - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } - }}; + if (AddrInfo[0].SgprParts.size() == 2 && GEPInfo.Imm == 0) { + if (Register OffsetReg = + matchZeroExtendFromS32(*MRI, GEPInfo.SgprParts[1])) { + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}}; + } + } + + return None; } std::pair<Register, int> @@ -4231,7 +4240,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { }, [=](MachineInstrBuilder &MIB) { // vaddr if (FI) - MIB.addFrameIndex(FI.getValue()); + MIB.addFrameIndex(FI.value()); else MIB.addReg(VAddr); }, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 31012915457b..26e6b9a10688 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -542,63 +542,37 @@ def atomic_store_64_#as : PatFrag<(ops node:$ptr, node:$val), } } // End foreach as -// TODO: Add GISelPredicateCode for the ret and noret PatFrags once -// GlobalISelEmitter allows pattern matches where src and dst def count -// mismatch. - -multiclass ret_noret_op { - let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }], - GISelPredicateCode = [{ return true; }] in { - def "_ret" : PatFrag<(ops node:$ptr, node:$data), - (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>; - } - - let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }], - GISelPredicateCode = [{ return false; }] in { - def "_noret" : PatFrag<(ops node:$ptr, node:$data), - (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>; - } +multiclass noret_op { + let HasNoUse = true in + def "_noret" : PatFrag<(ops node:$ptr, node:$data), + (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>; } -defm int_amdgcn_flat_atomic_fadd : ret_noret_op; -defm int_amdgcn_flat_atomic_fadd_v2bf16 : ret_noret_op; -defm int_amdgcn_flat_atomic_fmin : ret_noret_op; -defm int_amdgcn_flat_atomic_fmax : ret_noret_op; -defm int_amdgcn_global_atomic_fadd : ret_noret_op; -defm int_amdgcn_global_atomic_fadd_v2bf16 : ret_noret_op; -defm int_amdgcn_global_atomic_fmin : ret_noret_op; -defm int_amdgcn_global_atomic_fmax : ret_noret_op; -defm int_amdgcn_ds_fadd_v2bf16 : ret_noret_op; - -multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { - let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }], - GISelPredicateCode = [{ return false; }] in { - defm "_noret" : binary_atomic_op<atomic_op, IsInt>; - } +defm int_amdgcn_flat_atomic_fadd : noret_op; +defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; +defm int_amdgcn_flat_atomic_fmin : noret_op; +defm int_amdgcn_flat_atomic_fmax : noret_op; +defm int_amdgcn_global_atomic_fadd : noret_op; +defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; +defm int_amdgcn_global_atomic_fmin : noret_op; +defm int_amdgcn_global_atomic_fmax : noret_op; +defm int_amdgcn_ds_fadd_v2bf16 : noret_op; - let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }], - GISelPredicateCode = [{ return true; }] in { - defm "_ret" : binary_atomic_op<atomic_op, IsInt>; - } +multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { + let HasNoUse = true in + defm "_noret" : binary_atomic_op<atomic_op, IsInt>; } -multiclass ret_noret_ternary_atomic_op<SDNode atomic_op> { - let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }], - GISelPredicateCode = [{ return false; }] in { - defm "_noret" : ternary_atomic_op<atomic_op>; - } - - let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }], - GISelPredicateCode = [{ return true; }] in { - defm "_ret" : ternary_atomic_op<atomic_op>; - } +multiclass noret_ternary_atomic_op<SDNode atomic_op> { + let HasNoUse = true in + defm "_noret" : ternary_atomic_op<atomic_op>; } multiclass binary_atomic_op_all_as<SDNode atomic_op, bit IsInt = 1> { foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { defm "_"#as : binary_atomic_op<atomic_op, IsInt>; - defm "_"#as : ret_noret_binary_atomic_op<atomic_op, IsInt>; + defm "_"#as : noret_binary_atomic_op<atomic_op, IsInt>; } } } @@ -640,13 +614,15 @@ def store_align16_local: PatFrag<(ops node:$val, node:$ptr), let AddressSpaces = StoreAddress_local.AddrSpaces in { defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>; -defm atomic_cmp_swap_local : ret_noret_ternary_atomic_op<atomic_cmp_swap>; -defm atomic_cmp_swap_local_m0 : ret_noret_ternary_atomic_op<atomic_cmp_swap_glue>; +defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>; +defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>; +defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; } let AddressSpaces = StoreAddress_region.AddrSpaces in { -defm atomic_cmp_swap_region : ret_noret_ternary_atomic_op<atomic_cmp_swap>; -defm atomic_cmp_swap_region_m0 : ret_noret_ternary_atomic_op<atomic_cmp_swap_glue>; +defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>; +defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>; +defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index ed6ddbf426fd..38e04dedd9fc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -171,6 +171,10 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) { } void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { + // FIXME: Enable feature predicate checks once all the test pass. + // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(), + // getSubtargetInfo().getFeatureBits()); + if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h index 1b513c456307..745734aac2b4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h @@ -131,8 +131,8 @@ public: bool IsAOneAddressSpace = isOneAddressSpace(A); bool IsBOneAddressSpace = isOneAddressSpace(B); - return AIO.getValue() >= BIO.getValue() && - (IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace); + return AIO.value() >= BIO.value() && + (IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace); } }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 77816a783630..6bd906439ee8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -40,9 +40,9 @@ using namespace llvm; #include "AMDGPUGenSubtargetInfo.inc" #undef AMDGPUSubtarget -static cl::opt<bool> DisablePowerSched( - "amdgpu-disable-power-sched", - cl::desc("Disable scheduling to minimize mAI power bursts"), +static cl::opt<bool> EnablePowerSched( + "amdgpu-enable-power-sched", + cl::desc("Enable scheduling to minimize mAI power bursts"), cl::init(false)); static cl::opt<bool> EnableVGPRIndexMode( @@ -916,7 +916,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { void apply(ScheduleDAGInstrs *DAGInstrs) override { const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>(); - if (!ST.hasMAIInsts() || DisablePowerSched) + if (!ST.hasMAIInsts()) return; DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel(); @@ -966,7 +966,8 @@ void GCNSubtarget::getPostRAMutations( std::unique_ptr<ScheduleDAGMutation> GCNSubtarget::createFillMFMAShadowMutation(const TargetInstrInfo *TII) const { - return std::make_unique<FillMFMAShadowMutation>(&InstrInfo); + return EnablePowerSched ? std::make_unique<FillMFMAShadowMutation>(&InstrInfo) + : nullptr; } const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 1c6b9d35695a..971e44723758 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -22,11 +22,13 @@ #include "AMDGPUTargetTransformInfo.h" #include "GCNIterativeScheduler.h" #include "GCNSchedStrategy.h" +#include "GCNVOPDUtils.h" #include "R600.h" #include "R600TargetMachine.h" #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" #include "TargetInfo/AMDGPUTargetInfo.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" @@ -278,6 +280,12 @@ static cl::opt<bool> cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden); +// Enable GFX11+ VOPD +static cl::opt<bool> + EnableVOPD("amdgpu-enable-vopd", + cl::desc("Enable VOPD, dual issue of VALU in wave32"), + cl::init(true), cl::Hidden); + // Option is used in lit tests to prevent deadcoding of patterns inspected. static cl::opt<bool> EnableDCEInRA("amdgpu-dce-in-ra", @@ -383,6 +391,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIPreAllocateWWMRegsPass(*PR); initializeSIFormMemoryClausesPass(*PR); initializeSIPostRABundlerPass(*PR); + initializeGCNCreateVOPDPass(*PR); initializeAMDGPUUnifyDivergentExitNodesPass(*PR); initializeAMDGPUAAWrapperPassPass(*PR); initializeAMDGPUExternalAAWrapperPass(*PR); @@ -920,6 +929,8 @@ public: DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII)); DAG->addMutation(createIGroupLPDAGMutation()); DAG->addMutation(createSchedBarrierDAGMutation()); + if (isPassEnabled(EnableVOPD, CodeGenOpt::Less)) + DAG->addMutation(createVOPDPairingMutation()); return DAG; } @@ -1399,6 +1410,8 @@ void GCNPassConfig::addPreSched2() { } void GCNPassConfig::addPreEmitPass() { + if (isPassEnabled(EnableVOPD, CodeGenOpt::Less)) + addPass(&GCNCreateVOPDID); addPass(createSIMemoryLegalizerPass()); addPass(createSIInsertWaitcntsPass()); diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index a087323e5de7..04dd3e938a15 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1412,10 +1412,12 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">; multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isIntr = 0> { foreach RtnMode = ["ret", "noret"] in { - defvar Op = !cast<SDPatternOperator>(OpPrefix # "_" # RtnMode + defvar Op = !cast<SDPatternOperator>(OpPrefix + # !if(!eq(RtnMode, "ret"), "", "_noret") # !if(isIntr, "", "_" # vt.Size)); defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); + let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in { def : GCNPat< (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), vt:$vdata_in)), (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in, @@ -1428,6 +1430,7 @@ multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isInt (!cast<MUBUF_Pseudo>(Inst # "_ADDR64" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset) >; + } // end let AddedComplexity } // end foreach RtnMode } @@ -1439,10 +1442,12 @@ multiclass BufferAtomicIntrPat<string OpPrefix, ValueType vt, string Inst> { multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> { foreach RtnMode = ["ret", "noret"] in { - defvar Op = !cast<SDPatternOperator>("AMDGPUatomic_cmp_swap_global_" # RtnMode + defvar Op = !cast<SDPatternOperator>("AMDGPUatomic_cmp_swap_global" + # !if(!eq(RtnMode, "ret"), "", "_noret") # "_" # vt.Size); defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); + let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in { defvar OffsetResDag = (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix) getVregSrcForVT<data_vt>.ret:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset); @@ -1465,6 +1470,7 @@ multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> !if(!eq(vt, i32), sub0, sub0_sub1)), Addr64ResDag) >; + } // end let AddedComplexity } // end foreach RtnMode } @@ -1495,13 +1501,14 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst, list<string> RtnModes = ["ret", "noret"]> { foreach RtnMode = RtnModes in { - defvar Op = !cast<SDPatternOperator>(!if(!eq(RtnMode, "none"), - OpPrefix, OpPrefix # "_" # RtnMode)); - defvar InstSuffix = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")), - "_RTN", ""); - defvar CachePolicy = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")), + defvar Op = !cast<SDPatternOperator>(OpPrefix + # !if(!eq(RtnMode, "ret"), "", "_noret")); + + defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); + defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy), (timm:$cachepolicy)); + let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in { def : GCNPat< (vt (Op vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, 0)), @@ -1534,6 +1541,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy) >; + } // end let AddedComplexity } // end foreach RtnMode } @@ -1551,7 +1559,7 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_or", i32, "BUFFER_ATOMIC_OR">; defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i32, "BUFFER_ATOMIC_XOR">; defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i32, "BUFFER_ATOMIC_INC">; defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i32, "BUFFER_ATOMIC_DEC">; -defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["none"]>; +defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["ret"]>; defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", i64, "BUFFER_ATOMIC_SWAP_X2">; defm : SIBufferAtomicPat<"SIbuffer_atomic_add", i64, "BUFFER_ATOMIC_ADD_X2">; defm : SIBufferAtomicPat<"SIbuffer_atomic_sub", i64, "BUFFER_ATOMIC_SUB_X2">; @@ -1643,7 +1651,8 @@ let SubtargetPredicate = isGFX90APlus in { foreach RtnMode = ["ret", "noret"] in { -defvar Op = !cast<SDPatternOperator>(SIbuffer_atomic_cmpswap # "_" # RtnMode); +defvar Op = !cast<SDPatternOperator>(SIbuffer_atomic_cmpswap + # !if(!eq(RtnMode, "ret"), "", "_noret")); defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy), (timm:$cachepolicy)); diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 27b723875aa4..d8387bf6f1ae 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -950,10 +950,11 @@ defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">; } // End AddedComplexity = 100 -class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat < - (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value), - (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds)) ->; +class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0, + bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value), + (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))> { + let AddedComplexity = complexity; +} multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> { let OtherPredicates = [LDSRequiresM0Init] in { @@ -965,75 +966,88 @@ multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> { !cast<PatFrag>(frag#"_local_"#vt.Size)>; } - def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), 1>; + def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; } multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> { let OtherPredicates = [LDSRequiresM0Init] in { def : DSAtomicRetPat<inst, vt, - !cast<PatFrag>(frag#"_local_m0_ret_"#vt.Size)>; + !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>; def : DSAtomicRetPat<noRetInst, vt, - !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size)>; + !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size), /* complexity */ 1>; } let OtherPredicates = [NotLDSRequiresM0Init] in { def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, - !cast<PatFrag>(frag#"_local_ret_"#vt.Size)>; + !cast<PatFrag>(frag#"_local_"#vt.Size)>; def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt, - !cast<PatFrag>(frag#"_local_noret_"#vt.Size)>; + !cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>; } def : DSAtomicRetPat<inst, vt, - !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>; + !cast<PatFrag>(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; def : DSAtomicRetPat<noRetInst, vt, - !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>; + !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), + /* complexity */ 1, /* gds */ 1>; } let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in { // Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode. -class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat < +class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag, + int complexity = 0, bit gds=0> : GCNPat< (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap), - (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds)) ->; + (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds))> { + let AddedComplexity = complexity; +} multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> { let OtherPredicates = [LDSRequiresM0Init] in { - def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_ret_"#vt.Size)>; - def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size)>; + def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>; + def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size), + /* complexity */ 1>; } let OtherPredicates = [NotLDSRequiresM0Init] in { def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, - !cast<PatFrag>(frag#"_local_ret_"#vt.Size)>; + !cast<PatFrag>(frag#"_local_"#vt.Size)>; def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt, - !cast<PatFrag>(frag#"_local_noret_"#vt.Size)>; + !cast<PatFrag>(frag#"_local_noret_"#vt.Size), + /* complexity */ 1>; } - def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>; - def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>; + def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; + def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), + /* complexity */ 1, /* gds */ 1>; } } // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 let SubtargetPredicate = isGFX11Plus in { // The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode. -class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat < +class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, + int complexity = 0, bit gds=0> : GCNPat< (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap), - (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, offset:$offset, (i1 gds)) ->; + (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, offset:$offset, (i1 gds))> { + let AddedComplexity = complexity; +} multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> { def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, - !cast<PatFrag>(frag#"_local_ret_"#vt.Size)>; + !cast<PatFrag>(frag#"_local_"#vt.Size)>; def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt, - !cast<PatFrag>(frag#"_local_noret_"#vt.Size)>; + !cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>; - def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>; - def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>; + def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; + def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), + /* complexity */ 1, /* gds */ 1>; } } // End SubtargetPredicate = isGFX11Plus @@ -1090,17 +1104,20 @@ defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B64, DS_CMPSTORE_B64, i64, "atomic_cmp } // End SubtargetPredicate = isGFX11Plus let SubtargetPredicate = isGFX90APlus in { -def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_ret_64>; +def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_64>; +let AddedComplexity = 1 in def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_64>; } let SubtargetPredicate = isGFX940Plus in { -def : DSAtomicRetPat<DS_PK_ADD_RTN_F16, v2f16, atomic_load_fadd_v2f16_local_ret_32>; +def : DSAtomicRetPat<DS_PK_ADD_RTN_F16, v2f16, atomic_load_fadd_v2f16_local_32>; +let AddedComplexity = 1 in def : DSAtomicRetPat<DS_PK_ADD_F16, v2f16, atomic_load_fadd_v2f16_local_noret_32>; def : GCNPat < - (v2i16 (int_amdgcn_ds_fadd_v2bf16_ret i32:$ptr, v2i16:$src)), + (v2i16 (int_amdgcn_ds_fadd_v2bf16 i32:$ptr, v2i16:$src)), (DS_PK_ADD_RTN_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0) >; +let AddedComplexity = 1 in def : GCNPat < (v2i16 (int_amdgcn_ds_fadd_v2bf16_noret i32:$ptr, v2i16:$src)), (DS_PK_ADD_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index cb2822818549..c634e15945ad 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1015,31 +1015,35 @@ class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt multiclass FlatAtomicPat <string inst, string node, ValueType vt, ValueType data_vt = vt> { - defvar rtnNode = !cast<PatFrags>(node#"_ret_"#vt.Size); + defvar rtnNode = !cast<PatFrags>(node#"_"#vt.Size); defvar noRtnNode = !cast<PatFrags>(node#"_noret_"#vt.Size); def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; + let AddedComplexity = 1 in def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; } multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt, - ValueType data_vt = vt, bit isIntr = 0> { - defvar rtnNode = !cast<PatFrags>(node # "_ret" # !if(isIntr, "", "_" # vt.Size)); + ValueType data_vt = vt, int complexity = 0, + bit isIntr = 0> { + defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size)); defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size)); + let AddedComplexity = complexity in def : GCNPat <(vt (rtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; + let AddedComplexity = !add(complexity, 1) in def : GCNPat <(vt (noRtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; } multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt, ValueType data_vt = vt> { - defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* isIntr */ 1>; + defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>; } class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < @@ -1260,17 +1264,16 @@ multiclass GlobalFLATAtomicPatsRtn<string nortn_inst_name, SDPatternOperator nod multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt, ValueType data_vt = vt, bit isIntr = 0> { - defvar rtnNode = !cast<PatFrags>(node # "_ret" # !if(isIntr, "", "_" # vt.Size)); + defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size)); defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size)); - let AddedComplexity = 10 in { - defm : FlatSignedAtomicPat <inst, node, vt, data_vt, isIntr>; - } + defm : FlatSignedAtomicPat <inst, node, vt, data_vt, /* complexity */ 10, isIntr>; - let AddedComplexity = 11 in { - def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), noRtnNode, vt, data_vt>; - def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>; - } + let AddedComplexity = 13 in + def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), noRtnNode, vt, data_vt>; + + let AddedComplexity = 12 in + def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>; } multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt, diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp new file mode 100644 index 000000000000..83dc3bebf4d3 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp @@ -0,0 +1,175 @@ +//===- GCNCreateVOPD.cpp - Create VOPD Instructions ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Combine VALU pairs into VOPD instructions +/// Only works on wave32 +/// Has register requirements, we reject creating VOPD if the requirements are +/// not met. +/// shouldCombineVOPD mutator in postRA machine scheduler puts candidate +/// instructions for VOPD back-to-back +/// +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "GCNVOPDUtils.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include <utility> + +#define DEBUG_TYPE "gcn-create-vopd" +STATISTIC(NumVOPDCreated, "Number of VOPD Insts Created."); + +using namespace llvm; + +namespace { + +class GCNCreateVOPD : public MachineFunctionPass { +private: +public: + static char ID; + const GCNSubtarget *ST = nullptr; + + GCNCreateVOPD() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return "GCN Create VOPD Instructions"; + } + + bool doReplace(const SIInstrInfo *SII, + std::pair<MachineInstr *, MachineInstr *> &Pair) { + auto *FirstMI = Pair.first; + auto *SecondMI = Pair.second; + unsigned Opc1 = FirstMI->getOpcode(); + unsigned Opc2 = SecondMI->getOpcode(); + int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1), + AMDGPU::getVOPDOpcode(Opc2)); + assert(NewOpcode != -1 && + "Should have previously determined this as a possible VOPD\n"); + + auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI, + FirstMI->getDebugLoc(), SII->get(NewOpcode)) + .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags()); + VOPDInst.add(FirstMI->getOperand(0)) + .add(SecondMI->getOperand(0)) + .add(FirstMI->getOperand(1)); + + switch (Opc1) { + case AMDGPU::V_MOV_B32_e32: + break; + case AMDGPU::V_FMAMK_F32: + case AMDGPU::V_FMAAK_F32: + VOPDInst.add(FirstMI->getOperand(2)); + VOPDInst.add(FirstMI->getOperand(3)); + break; + default: + VOPDInst.add(FirstMI->getOperand(2)); + break; + } + + VOPDInst.add(SecondMI->getOperand(1)); + + switch (Opc2) { + case AMDGPU::V_MOV_B32_e32: + break; + case AMDGPU::V_FMAMK_F32: + case AMDGPU::V_FMAAK_F32: + VOPDInst.add(SecondMI->getOperand(2)); + VOPDInst.add(SecondMI->getOperand(3)); + break; + default: + VOPDInst.add(SecondMI->getOperand(2)); + break; + } + + VOPDInst.copyImplicitOps(*FirstMI); + VOPDInst.copyImplicitOps(*SecondMI); + + LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: " + << *Pair.first << "\tY: " << *Pair.second << "\n"); + FirstMI->eraseFromParent(); + SecondMI->eraseFromParent(); + ++NumVOPDCreated; + return true; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + ST = &MF.getSubtarget<GCNSubtarget>(); + if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32()) + return false; + LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n"); + + const SIInstrInfo *SII = ST->getInstrInfo(); + bool Changed = false; + + SmallVector<std::pair<MachineInstr *, MachineInstr *>> ReplaceCandidates; + + for (auto &MBB : MF) { + auto MII = MBB.begin(), E = MBB.end(); + while (MII != E) { + auto *FirstMI = &*MII; + MII = next_nodbg(MII, MBB.end()); + if (MII == MBB.end()) + break; + if (FirstMI->isDebugInstr()) + continue; + auto *SecondMI = &*MII; + unsigned Opc = FirstMI->getOpcode(); + unsigned Opc2 = SecondMI->getOpcode(); + llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); + llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); + std::pair<MachineInstr *, MachineInstr *> Pair; + + if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y) + Pair = {FirstMI, SecondMI}; + else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X) + Pair = {SecondMI, FirstMI}; + else + continue; + // checkVOPDRegConstraints cares about program order, but doReplace + // cares about X-Y order in the constituted VOPD + if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) { + ReplaceCandidates.push_back(Pair); + ++MII; + } + } + } + for (auto &Pair : ReplaceCandidates) { + Changed |= doReplace(SII, Pair); + } + + return Changed; + } +}; + +} // namespace + +char GCNCreateVOPD::ID = 0; + +char &llvm::GCNCreateVOPDID = GCNCreateVOPD::ID; + +INITIALIZE_PASS(GCNCreateVOPD, DEBUG_TYPE, "GCN Create VOPD Instructions", + false, false) diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 1cd880eaa48e..5d254518c67a 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -143,13 +143,20 @@ bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const { } int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const { - auto DPP32 = AMDGPU::getDPPOp32(Op); + int DPP32 = AMDGPU::getDPPOp32(Op); if (IsShrinkable) { assert(DPP32 == -1); - auto E32 = AMDGPU::getVOPe32(Op); + int E32 = AMDGPU::getVOPe32(Op); DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32); } - return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32; + if (DPP32 != -1 && TII->pseudoToMCOpcode(DPP32) != -1) + return DPP32; + int DPP64 = -1; + if (ST->hasVOP3DPP()) + DPP64 = AMDGPU::getDPPOp64(Op); + if (DPP64 != -1 && TII->pseudoToMCOpcode(DPP64) != -1) + return DPP64; + return -1; } // tracks the register operand definition and returns: @@ -188,6 +195,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp || MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); + bool HasVOP3DPP = ST->hasVOP3DPP(); auto OrigOp = OrigMI.getOpcode(); auto DPPOp = getDPPOp(OrigOp, IsShrinkable); if (DPPOp == -1) { @@ -201,10 +209,18 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, bool Fail = false; do { - auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst); - assert(Dst); - DPPInst.add(*Dst); - int NumOperands = 1; + int NumOperands = 0; + if (auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) { + DPPInst.add(*Dst); + ++NumOperands; + } + if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) { + if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) { + DPPInst.add(*SDst); + ++NumOperands; + } + // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst + } const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old); if (OldIdx != -1) { @@ -230,7 +246,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, AMDGPU::OpName::src0_modifiers)) { assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src0_modifiers)); - assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); + assert(HasVOP3DPP || + (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); DPPInst.addImm(Mod0->getImm()); ++NumOperands; } else if (AMDGPU::getNamedOperandIdx(DPPOp, @@ -253,7 +270,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, AMDGPU::OpName::src1_modifiers)) { assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src1_modifiers)); - assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); + assert(HasVOP3DPP || + (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); DPPInst.addImm(Mod1->getImm()); ++NumOperands; } else if (AMDGPU::getNamedOperandIdx(DPPOp, @@ -261,7 +279,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, DPPInst.addImm(0); ++NumOperands; } - if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { + auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); + if (Src1) { if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) { LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n"); Fail = true; @@ -270,8 +289,17 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, DPPInst.add(*Src1); ++NumOperands; } - - if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) { + if (auto *Mod2 = + TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers)) { + assert(NumOperands == + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers)); + assert(HasVOP3DPP || + (0LL == (Mod2->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); + DPPInst.addImm(Mod2->getImm()); + ++NumOperands; + } + auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2); + if (Src2) { if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) || !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) { LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n"); @@ -279,8 +307,62 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, break; } DPPInst.add(*Src2); + ++NumOperands; + } + if (HasVOP3DPP) { + auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp); + if (ClampOpr && + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::clamp) != -1) { + DPPInst.addImm(ClampOpr->getImm()); + } + auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in); + if (VdstInOpr && + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::vdst_in) != -1) { + DPPInst.add(*VdstInOpr); + } + auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod); + if (OmodOpr && + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::omod) != -1) { + DPPInst.addImm(OmodOpr->getImm()); + } + // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to + // all 1. + if (auto *OpSelOpr = + TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) { + auto OpSel = OpSelOpr->getImm(); + if (OpSel != 0) { + LLVM_DEBUG(dbgs() << " failed: op_sel must be zero\n"); + Fail = true; + break; + } + if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel) != -1) + DPPInst.addImm(OpSel); + } + if (auto *OpSelHiOpr = + TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) { + auto OpSelHi = OpSelHiOpr->getImm(); + // Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check + // the bitmask for 3 op_sel_hi bits set + assert(Src2 && "Expected vop3p with 3 operands"); + if (OpSelHi != 7) { + LLVM_DEBUG(dbgs() << " failed: op_sel_hi must be all set to one\n"); + Fail = true; + break; + } + if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel_hi) != -1) + DPPInst.addImm(OpSelHi); + } + auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo); + if (NegOpr && + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_lo) != -1) { + DPPInst.addImm(NegOpr->getImm()); + } + auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi); + if (NegHiOpr && + AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_hi) != -1) { + DPPInst.addImm(NegHiOpr->getImm()); + } } - DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl)); DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask)); DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask)); @@ -531,8 +613,16 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { } bool IsShrinkable = isShrinkable(OrigMI); - if (!(IsShrinkable || TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) { - LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n"); + if (!(IsShrinkable || + ((TII->isVOP3P(OrigOp) || TII->isVOPC(OrigOp) || + TII->isVOP3(OrigOp)) && + ST->hasVOP3DPP()) || + TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) { + LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3/3P/C\n"); + break; + } + if (OrigMI.modifiesRegister(AMDGPU::EXEC, ST->getRegisterInfo())) { + LLVM_DEBUG(dbgs() << " failed: can't combine v_cmpx\n"); break; } @@ -543,9 +633,12 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { break; } + auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2); assert(Src0 && "Src1 without Src0?"); - if (Src1 && Src1->isIdenticalTo(*Src0)) { - assert(Src1->isReg()); + if ((Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) || + (Src2 && Src2->isIdenticalTo(*Src0)))) || + (Use == Src1 && (Src1->isIdenticalTo(*Src0) || + (Src2 && Src2->isIdenticalTo(*Src1))))) { LLVM_DEBUG( dbgs() << " " << OrigMI diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp new file mode 100644 index 000000000000..a5008e39d91a --- /dev/null +++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp @@ -0,0 +1,212 @@ +//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the AMDGPU DAG scheduling +/// mutation to pair VOPD instructions back to back. It also contains +// subroutines useful in the creation of VOPD instructions +// +//===----------------------------------------------------------------------===// + +#include "GCNVOPDUtils.h" +#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +#define DEBUG_TYPE "gcn-vopd-utils" + +bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, + const MachineInstr &FirstMI, + const MachineInstr &SecondMI) { + const MachineFunction *MF = FirstMI.getMF(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); + const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo()); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + const unsigned NumVGPRBanks = 4; + // Literals also count against scalar bus limit + SmallVector<const MachineOperand *> UniqueLiterals; + auto addLiteral = [&](const MachineOperand &Op) { + for (auto &Literal : UniqueLiterals) { + if (Literal->isIdenticalTo(Op)) + return; + } + UniqueLiterals.push_back(&Op); + }; + SmallVector<Register> UniqueScalarRegs; + assert([&]() -> bool { + for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); + MII != FirstMI.getParent()->instr_end(); ++MII) { + if (&*MII == &SecondMI) + return true; + } + return false; + }() && "Expected FirstMI to precede SecondMI"); + // Cannot pair dependent instructions + for (const auto &Use : SecondMI.uses()) + if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg())) + return false; + + struct ComponentInfo { + ComponentInfo(const MachineInstr &MI) : MI(MI) {} + Register Dst, Reg0, Reg1, Reg2; + const MachineInstr &MI; + }; + ComponentInfo CInfo[] = {ComponentInfo(FirstMI), ComponentInfo(SecondMI)}; + + for (ComponentInfo &Comp : CInfo) { + switch (Comp.MI.getOpcode()) { + case AMDGPU::V_FMAMK_F32: + // cannot inline the fixed literal in fmamk + addLiteral(Comp.MI.getOperand(2)); + Comp.Reg2 = Comp.MI.getOperand(3).getReg(); + break; + case AMDGPU::V_FMAAK_F32: + // cannot inline the fixed literal in fmaak + addLiteral(Comp.MI.getOperand(3)); + Comp.Reg1 = Comp.MI.getOperand(2).getReg(); + break; + case AMDGPU::V_FMAC_F32_e32: + case AMDGPU::V_DOT2_F32_F16: + case AMDGPU::V_DOT2_F32_BF16: + Comp.Reg1 = Comp.MI.getOperand(2).getReg(); + Comp.Reg2 = Comp.MI.getOperand(0).getReg(); + break; + case AMDGPU::V_CNDMASK_B32_e32: + UniqueScalarRegs.push_back(AMDGPU::VCC_LO); + Comp.Reg1 = Comp.MI.getOperand(2).getReg(); + break; + case AMDGPU::V_MOV_B32_e32: + break; + default: + Comp.Reg1 = Comp.MI.getOperand(2).getReg(); + break; + } + + Comp.Dst = Comp.MI.getOperand(0).getReg(); + + const MachineOperand &Op0 = Comp.MI.getOperand(1); + if (Op0.isReg()) { + if (!TRI->isVectorRegister(MRI, Op0.getReg())) { + if (!is_contained(UniqueScalarRegs, Op0.getReg())) + UniqueScalarRegs.push_back(Op0.getReg()); + } else + Comp.Reg0 = Op0.getReg(); + } else { + if (!TII.isInlineConstant(Comp.MI, 1)) + addLiteral(Op0); + } + } + + if (UniqueLiterals.size() > 1) + return false; + if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2) + return false; + + // check port 0 + if (CInfo[0].Reg0 && CInfo[1].Reg0 && + CInfo[0].Reg0 % NumVGPRBanks == CInfo[1].Reg0 % NumVGPRBanks) + return false; + // check port 1 + if (CInfo[0].Reg1 && CInfo[1].Reg1 && + CInfo[0].Reg1 % NumVGPRBanks == CInfo[1].Reg1 % NumVGPRBanks) + return false; + // check port 2 + if (CInfo[0].Reg2 && CInfo[1].Reg2 && + !((CInfo[0].Reg2 ^ CInfo[1].Reg2) & 0x1)) + return false; + if (!((CInfo[0].Dst ^ CInfo[1].Dst) & 0x1)) + return false; + + LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI + << "\n\tY: " << SecondMI << "\n"); + return true; +} + +/// Check if the instr pair, FirstMI and SecondMI, should be scheduled +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII); + unsigned Opc2 = SecondMI.getOpcode(); + auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); + + // One instruction case + if (!FirstMI) + return SecondCanBeVOPD.Y; + + unsigned Opc = FirstMI->getOpcode(); + auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); + + if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || + (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) + return false; + + return checkVOPDRegConstraints(STII, *FirstMI, SecondMI); +} + +/// Adapts design from MacroFusion +/// Puts valid candidate instructions back-to-back so they can easily +/// be turned into VOPD instructions +/// Greedily pairs instruction candidates. O(n^2) algorithm. +struct VOPDPairingMutation : ScheduleDAGMutation { + ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer + + VOPDPairingMutation( + ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer + : shouldScheduleAdjacent(shouldScheduleAdjacent) {} + + void apply(ScheduleDAGInstrs *DAG) override { + const TargetInstrInfo &TII = *DAG->TII; + const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>(); + if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) { + LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n"); + return; + } + + std::vector<SUnit>::iterator ISUI, JSUI; + for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) { + const MachineInstr *IMI = ISUI->getInstr(); + if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI)) + continue; + if (!hasLessThanNumFused(*ISUI, 2)) + continue; + + for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) { + if (JSUI->isBoundaryNode()) + continue; + const MachineInstr *JMI = JSUI->getInstr(); + if (!hasLessThanNumFused(*JSUI, 2) || + !shouldScheduleAdjacent(TII, ST, IMI, *JMI)) + continue; + if (fuseInstructionPair(*DAG, *ISUI, *JSUI)) + break; + } + } + LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n"); + } +}; + +std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() { + return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent); +} diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h new file mode 100644 index 000000000000..22361b9a1a07 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h @@ -0,0 +1,32 @@ +//===- GCNVOPDUtils.h - GCN VOPD Utils ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the AMDGPU DAG scheduling +/// mutation to pair VOPD instructions back to back. It also contains +// subroutines useful in the creation of VOPD instructions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_VOPDUTILS_H +#define LLVM_LIB_TARGET_AMDGPU_VOPDUTILS_H + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +class SIInstrInfo; + +bool checkVOPDRegConstraints(const SIInstrInfo &TII, + const MachineInstr &FirstMI, + const MachineInstr &SecondMI); + +std::unique_ptr<ScheduleDAGMutation> createVOPDPairingMutation(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_VOPDUTILS_H diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h index 02c213f90f89..228963ff2a20 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -62,12 +62,6 @@ public: virtual void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const = 0; - -protected: - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; }; } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 11fe3f9ef058..fba4b1a3db66 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -36,6 +36,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "AMDGPUGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index 060d4b660632..c2e2563c3989 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -50,6 +50,7 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, #define GET_INSTRINFO_ENUM #define GET_INSTRINFO_OPERAND_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "AMDGPUGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 78eb304fe84f..3d926e52c368 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -58,11 +58,6 @@ private: uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; - }; } // end anonymous namespace @@ -90,11 +85,8 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, } void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - verifyInstructionPredicates(MI, - computeAvailableFeatures(STI.getFeatureBits())); - + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); if (MI.getOpcode() == R600::RETURN || MI.getOpcode() == R600::FETCH_CLAUSE || @@ -187,5 +179,4 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, return MO.getImm(); } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "R600GenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp index 269209a12175..b9ff195e0ddc 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp @@ -13,10 +13,12 @@ #include "R600MCTargetDesc.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/SubtargetFeature.h" using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "R600GenInstrInfo.inc" MCInstrInfo *llvm::createR600MCInstrInfo() { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h index 605ae851378d..b4ce748532f8 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h @@ -35,6 +35,7 @@ MCInstrInfo *createR600MCInstrInfo(); #define GET_INSTRINFO_ENUM #define GET_INSTRINFO_OPERAND_ENUM #define GET_INSTRINFO_SCHED_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "R600GenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 5e67fb5ec876..e093d78b2cc6 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -310,11 +310,8 @@ uint64_t SIMCCodeEmitter::getImplicitOpSelHiEncoding(int Opcode) const { } void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - verifyInstructionPredicates(MI, - computeAvailableFeatures(STI.getFeatureBits())); - + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { int Opcode = MI.getOpcode(); APInt Encoding, Scratch; getBinaryCodeForInstr(MI, Fixups, Encoding, Scratch, STI); @@ -574,5 +571,4 @@ void SIMCCodeEmitter::getMachineOpValueCommon( llvm_unreachable("Encoding of this operand type is not supported yet."); } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "AMDGPUGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index bf52f7830ad7..5199a37a0519 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1623,7 +1623,7 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, NewBldVec); } -SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4], +SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG, const SDLoc &DL) const { // Old -> New swizzle values diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h index 1e75a0432ec3..e7706fa0ef5c 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -74,8 +74,8 @@ private: void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, MachineRegisterInfo & MRI, unsigned dword_offset) const; - SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG, - const SDLoc &DL) const; + SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], + SelectionDAG &DAG, const SDLoc &DL) const; SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const; SDValue lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp index 8f7807a2b472..f81f5122bbc9 100644 --- a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp @@ -13,6 +13,7 @@ // #include "AMDGPUMCInstLower.h" +#include "MCTargetDesc/R600MCTargetDesc.h" #include "R600AsmPrinter.h" #include "R600Subtarget.h" #include "llvm/CodeGen/MachineOperand.h" @@ -42,6 +43,9 @@ void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { } void R600AsmPrinter::emitInstruction(const MachineInstr *MI) { + R600_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>(); R600MCInstLower MCInstLowering(OutContext, STI, *this); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 094d5cd58673..d16da2a8b86b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -352,7 +352,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // TODO: Generalize to more vector types. setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}, {MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8, - MVT::v4i16, MVT::v4f16, MVT::v16i16, MVT::v16f16}, + MVT::v4i16, MVT::v4f16}, Custom); // Deal with vec3 vector operations when widened to vec4. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 814a7c446889..799d34e32d27 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3335,15 +3335,18 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { MachineInstr *DefMI; - const auto killDef = [&DefMI, &MBB, this]() -> void { + const auto killDef = [&]() -> void { const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // The only user is the instruction which will be killed. - if (!MRI.hasOneNonDBGUse(DefMI->getOperand(0).getReg())) + Register DefReg = DefMI->getOperand(0).getReg(); + if (!MRI.hasOneNonDBGUse(DefReg)) return; // We cannot just remove the DefMI here, calling pass will crash. DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) DefMI->removeOperand(I); + if (LV) + LV->getVarInfo(DefReg).AliveBlocks.clear(); }; int64_t Imm; @@ -3982,6 +3985,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + int Src3Idx = -1; + if (Src0Idx == -1) { + // VOPD V_DUAL_* instructions use different operand names. + Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X); + Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X); + Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y); + Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y); + } // Make sure the number of operands is correct. const MCInstrDesc &Desc = get(Opcode); @@ -4255,9 +4266,9 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, // Only look at the true operands. Only a real operand can use the constant // bus, and we don't want to check pseudo-operands like the source modifier // flags. - for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) { + for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) { if (OpIdx == -1) - break; + continue; const MachineOperand &MO = MI.getOperand(OpIdx); if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) { if (MO.isReg()) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 311f9f68e675..1b411eb83eb3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1242,6 +1242,9 @@ namespace AMDGPU { int getDPPOp32(uint16_t Opcode); LLVM_READONLY + int getDPPOp64(uint16_t Opcode); + + LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode); LLVM_READONLY diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 29ee9f12b12d..23afd6556bc9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -193,43 +193,32 @@ def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; -multiclass SDBufferAtomicRetNoRet { - def "_ret" : PatFrag< - (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, - node:$offset, node:$cachepolicy, node:$idxen), - (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, - node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, - node:$idxen)> { - let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }]; - let GISelPredicateCode = [{ return true; }]; - } - +multiclass SDBufferAtomicNoRet { def "_noret" : PatFrag< (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, node:$idxen)> { - let PredicateCode = [{ return SDValue(N, 0).use_empty(); }]; - let GISelPredicateCode = [{ return false; }]; + let HasNoUse = true; } } -defm SIbuffer_atomic_swap : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_add : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_sub : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_smin : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_umin : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_smax : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_umax : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_and : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_or : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_xor : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_inc : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_dec : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_fadd : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_fmin : SDBufferAtomicRetNoRet; -defm SIbuffer_atomic_fmax : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_swap : SDBufferAtomicNoRet; +defm SIbuffer_atomic_add : SDBufferAtomicNoRet; +defm SIbuffer_atomic_sub : SDBufferAtomicNoRet; +defm SIbuffer_atomic_smin : SDBufferAtomicNoRet; +defm SIbuffer_atomic_umin : SDBufferAtomicNoRet; +defm SIbuffer_atomic_smax : SDBufferAtomicNoRet; +defm SIbuffer_atomic_umax : SDBufferAtomicNoRet; +defm SIbuffer_atomic_and : SDBufferAtomicNoRet; +defm SIbuffer_atomic_or : SDBufferAtomicNoRet; +defm SIbuffer_atomic_xor : SDBufferAtomicNoRet; +defm SIbuffer_atomic_inc : SDBufferAtomicNoRet; +defm SIbuffer_atomic_dec : SDBufferAtomicNoRet; +defm SIbuffer_atomic_fadd : SDBufferAtomicNoRet; +defm SIbuffer_atomic_fmin : SDBufferAtomicNoRet; +defm SIbuffer_atomic_fmax : SDBufferAtomicNoRet; def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", SDTypeProfile<1, 9, @@ -246,24 +235,13 @@ def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] >; -def SIbuffer_atomic_cmpswap_ret : PatFrag< - (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, - node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), - (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, - node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, - node:$idxen)> { - let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }]; - let GISelPredicateCode = [{ return true; }]; -} - def SIbuffer_atomic_cmpswap_noret : PatFrag< (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, node:$idxen)> { - let PredicateCode = [{ return SDValue(N, 0).use_empty(); }]; - let GISelPredicateCode = [{ return false; }]; + let HasNoUse = true; } class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, @@ -774,13 +752,13 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, let AddressSpaces = StoreAddress_local.AddrSpaces in { defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; - defm _local_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), + defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; } let AddressSpaces = StoreAddress_region.AddrSpaces in { defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; - defm _region_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), + defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; } } @@ -2194,21 +2172,21 @@ class getAsmVOP3DPPBase <int NumSrcArgs, bit HasDst, bit HasClamp, "$sdst", "$vdst"), ""); // use $sdst for VOPC - string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); - string isrc1 = !if(!eq(NumSrcArgs, 1), "", - !if(!eq(NumSrcArgs, 2), " $src1", - " $src1,")); - string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); - - string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); - string fsrc1 = !if(!eq(NumSrcArgs, 1), "", - !if(!eq(NumSrcArgs, 2), " $src1_modifiers", - " $src1_modifiers,")); - string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); - - string src0 = !if(Src0HasMods, fsrc0, isrc0); - string src1 = !if(Src1HasMods, fsrc1, isrc1); - string src2 = !if(Src2HasMods, fsrc2, isrc2); + string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); + string src1nomods = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1", + " $src1,")); + string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", ""); + + string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); + string src1mods = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1_modifiers", + " $src1_modifiers,")); + string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); + + string src0 = !if(Src0HasMods, src0mods, src0nomods); + string src1 = !if(Src1HasMods, src1mods, src1nomods); + string src2 = !if(Src2HasMods, src2mods, src2nomods); string opsel = !if(HasOpSel, "$op_sel", ""); string 3PMods = !if(IsVOP3P, !if(HasOpSel, "$op_sel_hi", "") @@ -2559,8 +2537,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, // the asm operand name via this HasModifiers flag field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret; field string AsmVOP3DPPBase = getAsmVOP3DPPBase<NumSrcArgs, HasDst, HasClamp, - HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasSrc0FloatMods, HasSrc1FloatMods, - HasSrc2FloatMods, DstVT >.ret; + HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers, + HasModifiers, DstVT>.ret; field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3DPPBase>.ret; field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3DPPBase>.ret; field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret; @@ -2800,6 +2778,14 @@ def getDPPOp32 : InstrMapping { let ValueCols = [["DPP"]]; } +def getDPPOp64 : InstrMapping { + let FilterClass = "VOP"; + let RowFields = ["OpName"]; + let ColFields = ["AsmVariantName"]; + let KeyCol = ["VOP3"]; + let ValueCols = [["VOP3_DPP"]]; +} + // Maps an commuted opcode to its original version def getCommuteOrig : InstrMapping { let FilterClass = "Commutable_REV"; @@ -2961,6 +2947,27 @@ def getVCMPXOpFromVCMP : InstrMapping { let ValueCols = [["1"]]; } +def VOPDComponentTable : GenericTable { + let FilterClass = "VOPD_Component"; + let CppTypeName = "VOPDComponentInfo"; + let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"]; + let PrimaryKey = ["BaseVOP"]; + let PrimaryKeyName = "getVOPDComponentHelper"; +} + +def VOPDPairs : GenericTable { + let FilterClass = "VOPD_Base"; + let CppTypeName = "VOPDInfo"; + let Fields = ["Opcode", "OpX", "OpY"]; + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getVOPDOpcodeHelper"; +} + +def getVOPDInfoFromComponentOpcodes : SearchIndex { + let Table = VOPDPairs; + let Key = ["OpX", "OpY"]; +} + include "SIInstructions.td" include "DSInstructions.td" diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 829669157893..ce8c03bb8d64 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1449,6 +1449,14 @@ def : BitConvert <v8i32, v16f16, VReg_256>; def : BitConvert <v8i32, v16i16, VReg_256>; def : BitConvert <v8f32, v16f16, VReg_256>; def : BitConvert <v8f32, v16i16, VReg_256>; +def : BitConvert <v16f16, v4i64, VReg_256>; +def : BitConvert <v16i16, v4i64, VReg_256>; +def : BitConvert <v16f16, v4f64, VReg_256>; +def : BitConvert <v16i16, v4f64, VReg_256>; +def : BitConvert <v4i64, v16f16, VReg_256>; +def : BitConvert <v4i64, v16i16, VReg_256>; +def : BitConvert <v4f64, v16f16, VReg_256>; +def : BitConvert <v4f64, v16i16, VReg_256>; // 512-bit bitcast def : BitConvert <v16i32, v16f32, VReg_512>; @@ -3012,6 +3020,35 @@ multiclass Int16Med3Pat<Instruction med3Inst, def : FPMed3Pat<f32, V_MED3_F32_e64>; +class +IntMinMaxPat<Instruction minmaxInst, SDPatternOperator min_or_max, + SDPatternOperator max_or_min_oneuse> : AMDGPUPat < + (DivergentBinFrag<min_or_max> (max_or_min_oneuse i32:$src0, i32:$src1), + i32:$src2), + (minmaxInst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2) +>; + +class +FPMinMaxPat<Instruction minmaxInst, ValueType vt, SDPatternOperator min_or_max, + SDPatternOperator max_or_min_oneuse> : GCNPat < + (min_or_max (max_or_min_oneuse (VOP3Mods vt:$src0, i32:$src0_mods), + (VOP3Mods vt:$src1, i32:$src1_mods)), + (vt (VOP3Mods vt:$src2, i32:$src2_mods))), + (minmaxInst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, + DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +let OtherPredicates = [isGFX11Plus] in { +def : IntMinMaxPat<V_MAXMIN_I32_e64, smin, smax_oneuse>; +def : IntMinMaxPat<V_MINMAX_I32_e64, smax, smin_oneuse>; +def : IntMinMaxPat<V_MAXMIN_U32_e64, umin, umax_oneuse>; +def : IntMinMaxPat<V_MINMAX_U32_e64, umax, umin_oneuse>; +def : FPMinMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>; +def : FPMinMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>; +def : FPMinMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; +def : FPMinMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; +} + let OtherPredicates = [isGFX9Plus] in { def : FP16Med3Pat<f16, V_MED3_F16_e64>; defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax, smax_oneuse, smin_oneuse>; diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 607383ab8cde..67077a2eaa6b 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -148,6 +148,7 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addUsedIfAvailable<LiveIntervals>(); // Should preserve the same set that TwoAddressInstructions does. AU.addPreserved<MachineDominatorTree>(); AU.addPreserved<SlotIndexes>(); diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index dd881ec42d53..786b6b61cb23 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -72,7 +72,7 @@ INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; -/// Insert restore code for the callee-saved registers used in the function. +/// Insert spill code for the callee-saved registers used in the function. static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef<CalleeSavedInfo> CSI, LiveIntervals *LIS) { diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp index e426e938b856..ff5587fbb0ca 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -1883,7 +1883,13 @@ void SIScheduleDAGMI::schedule() LLVM_DEBUG(dbgs() << "Preparing Scheduling\n"); buildDAGWithRegPressure(); + postprocessDAG(); + LLVM_DEBUG(dump()); + if (PrintDAGs) + dump(); + if (ViewMISchedDAGs) + viewGraph(); topologicalSort(); findRootsAndBiasEdges(TopRoots, BotRoots); diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 8a66213931ff..6b93769949bc 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -2329,13 +2329,13 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { continue; if (const auto &MOI = MOA.getLoadInfo(MI)) - Changed |= expandLoad(MOI.getValue(), MI); + Changed |= expandLoad(MOI.value(), MI); else if (const auto &MOI = MOA.getStoreInfo(MI)) - Changed |= expandStore(MOI.getValue(), MI); + Changed |= expandStore(MOI.value(), MI); else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) - Changed |= expandAtomicFence(MOI.getValue(), MI); + Changed |= expandAtomicFence(MOI.value(), MI); else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) - Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI); + Changed |= expandAtomicCmpxchgOrRmw(MOI.value(), MI); } } diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 5215397d5936..66bc46aaefea 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -9,6 +9,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" @@ -20,10 +21,40 @@ using namespace llvm; namespace { class SIOptimizeExecMasking : public MachineFunctionPass { + MachineFunction *MF = nullptr; + const GCNSubtarget *ST = nullptr; + const SIRegisterInfo *TRI = nullptr; + const SIInstrInfo *TII = nullptr; + const MachineRegisterInfo *MRI = nullptr; + + Register isCopyFromExec(const MachineInstr &MI) const; + Register isCopyToExec(const MachineInstr &MI) const; + bool removeTerminatorBit(MachineInstr &MI) const; + MachineBasicBlock::reverse_iterator + fixTerminators(MachineBasicBlock &MBB) const; + MachineBasicBlock::reverse_iterator + findExecCopy(MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I, + unsigned CopyToExec) const; + + bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start, + MCRegister Reg, bool UseLiveOuts = false, + bool IgnoreStart = false) const; + bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg) const; + MachineInstr *findInstrBackwards(MachineInstr &Origin, + std::function<bool(MachineInstr *)> Pred, + ArrayRef<MCRegister> NonModifiableRegs, + unsigned MaxInstructions = 20) const; + MachineInstr *findPossibleVCMPVCMPXOptimization(MachineInstr &SaveExec, + MCRegister Exec) const; + bool optimizeExecSequence() const; + bool optimizeVCmpxAndSaveexecSequence() const; + bool optimizeSingleVCMPSaveExecSequence(MachineInstr &SaveExecInstr, + MachineInstr &VCmp, + MCRegister Exec) const; + public: static char ID; -public: SIOptimizeExecMasking() : MachineFunctionPass(ID) { initializeSIOptimizeExecMaskingPass(*PassRegistry::getPassRegistry()); } @@ -53,7 +84,7 @@ char SIOptimizeExecMasking::ID = 0; char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID; /// If \p MI is a copy from exec, return the register copied to. -static Register isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) { +Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const { switch (MI.getOpcode()) { case AMDGPU::COPY: case AMDGPU::S_MOV_B64: @@ -61,8 +92,7 @@ static Register isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) { case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B32_term: { const MachineOperand &Src = MI.getOperand(1); - if (Src.isReg() && - Src.getReg() == (ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC)) + if (Src.isReg() && Src.getReg() == TRI->getExec()) return MI.getOperand(0).getReg(); } } @@ -71,14 +101,13 @@ static Register isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) { } /// If \p MI is a copy to exec, return the register copied from. -static Register isCopyToExec(const MachineInstr &MI, const GCNSubtarget &ST) { +Register SIOptimizeExecMasking::isCopyToExec(const MachineInstr &MI) const { switch (MI.getOpcode()) { case AMDGPU::COPY: case AMDGPU::S_MOV_B64: case AMDGPU::S_MOV_B32: { const MachineOperand &Dst = MI.getOperand(0); - if (Dst.isReg() && - Dst.getReg() == (ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC) && + if (Dst.isReg() && Dst.getReg() == TRI->getExec() && MI.getOperand(1).isReg()) return MI.getOperand(1).getReg(); break; @@ -173,64 +202,64 @@ static unsigned getSaveExecOp(unsigned Opc) { // These are only terminators to get correct spill code placement during // register allocation, so turn them back into normal instructions. -static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) { +bool SIOptimizeExecMasking::removeTerminatorBit(MachineInstr &MI) const { switch (MI.getOpcode()) { case AMDGPU::S_MOV_B32_term: { bool RegSrc = MI.getOperand(1).isReg(); - MI.setDesc(TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32)); + MI.setDesc(TII->get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32)); return true; } case AMDGPU::S_MOV_B64_term: { bool RegSrc = MI.getOperand(1).isReg(); - MI.setDesc(TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64)); + MI.setDesc(TII->get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64)); return true; } case AMDGPU::S_XOR_B64_term: { // This is only a terminator to get the correct spill code placement during // register allocation. - MI.setDesc(TII.get(AMDGPU::S_XOR_B64)); + MI.setDesc(TII->get(AMDGPU::S_XOR_B64)); return true; } case AMDGPU::S_XOR_B32_term: { // This is only a terminator to get the correct spill code placement during // register allocation. - MI.setDesc(TII.get(AMDGPU::S_XOR_B32)); + MI.setDesc(TII->get(AMDGPU::S_XOR_B32)); return true; } case AMDGPU::S_OR_B64_term: { // This is only a terminator to get the correct spill code placement during // register allocation. - MI.setDesc(TII.get(AMDGPU::S_OR_B64)); + MI.setDesc(TII->get(AMDGPU::S_OR_B64)); return true; } case AMDGPU::S_OR_B32_term: { // This is only a terminator to get the correct spill code placement during // register allocation. - MI.setDesc(TII.get(AMDGPU::S_OR_B32)); + MI.setDesc(TII->get(AMDGPU::S_OR_B32)); return true; } case AMDGPU::S_ANDN2_B64_term: { // This is only a terminator to get the correct spill code placement during // register allocation. - MI.setDesc(TII.get(AMDGPU::S_ANDN2_B64)); + MI.setDesc(TII->get(AMDGPU::S_ANDN2_B64)); return true; } case AMDGPU::S_ANDN2_B32_term: { // This is only a terminator to get the correct spill code placement during // register allocation. - MI.setDesc(TII.get(AMDGPU::S_ANDN2_B32)); + MI.setDesc(TII->get(AMDGPU::S_ANDN2_B32)); return true; } case AMDGPU::S_AND_B64_term: { // This is only a terminator to get the correct spill code placement during // register allocation. - MI.setDesc(TII.get(AMDGPU::S_AND_B64)); + MI.setDesc(TII->get(AMDGPU::S_AND_B64)); return true; } case AMDGPU::S_AND_B32_term: { // This is only a terminator to get the correct spill code placement during // register allocation. - MI.setDesc(TII.get(AMDGPU::S_AND_B32)); + MI.setDesc(TII->get(AMDGPU::S_AND_B32)); return true; } default: @@ -241,9 +270,8 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) { // Turn all pseudoterminators in the block into their equivalent non-terminator // instructions. Returns the reverse iterator to the first non-terminator // instruction in the block. -static MachineBasicBlock::reverse_iterator fixTerminators( - const SIInstrInfo &TII, - MachineBasicBlock &MBB) { +MachineBasicBlock::reverse_iterator +SIOptimizeExecMasking::fixTerminators(MachineBasicBlock &MBB) const { MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); bool Seen = false; @@ -252,7 +280,7 @@ static MachineBasicBlock::reverse_iterator fixTerminators( if (!I->isTerminator()) return Seen ? FirstNonTerm : I; - if (removeTerminatorBit(TII, *I)) { + if (removeTerminatorBit(*I)) { if (!Seen) { FirstNonTerm = I; Seen = true; @@ -263,17 +291,15 @@ static MachineBasicBlock::reverse_iterator fixTerminators( return FirstNonTerm; } -static MachineBasicBlock::reverse_iterator findExecCopy( - const SIInstrInfo &TII, - const GCNSubtarget &ST, - MachineBasicBlock &MBB, - MachineBasicBlock::reverse_iterator I, - unsigned CopyToExec) { +MachineBasicBlock::reverse_iterator +SIOptimizeExecMasking::findExecCopy(MachineBasicBlock &MBB, + MachineBasicBlock::reverse_iterator I, + unsigned CopyToExec) const { const unsigned InstLimit = 25; auto E = MBB.rend(); for (unsigned N = 0; N <= InstLimit && I != E; ++I, ++N) { - Register CopyFromExec = isCopyFromExec(*I, ST); + Register CopyFromExec = isCopyFromExec(*I); if (CopyFromExec.isValid()) return I; } @@ -298,11 +324,9 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) { // an arbitrary condition based on the current MachineInstr, for instance an // target instruction. Breaks prematurely by returning nullptr if one of the // registers given in NonModifiableRegs is modified by the current instruction. -static MachineInstr * -findInstrBackwards(MachineInstr &Origin, - std::function<bool(MachineInstr *)> Pred, - ArrayRef<MCRegister> NonModifiableRegs, - const SIRegisterInfo *TRI, unsigned MaxInstructions = 20) { +MachineInstr *SIOptimizeExecMasking::findInstrBackwards( + MachineInstr &Origin, std::function<bool(MachineInstr *)> Pred, + ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions) const { MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(), E = Origin.getParent()->rend(); unsigned CurrentIteration = 0; @@ -310,7 +334,7 @@ findInstrBackwards(MachineInstr &Origin, for (++A; CurrentIteration < MaxInstructions && A != E; ++A) { if (A->isDebugInstr()) continue; - + if (Pred(&*A)) return &*A; @@ -318,209 +342,64 @@ findInstrBackwards(MachineInstr &Origin, if (A->modifiesRegister(Reg, TRI)) return nullptr; } - + ++CurrentIteration; } return nullptr; } - // Determine if a register Reg is not re-defined and still in use // in the range (Stop..Start]. // It does so by backwards calculating liveness from the end of the BB until // either Stop or the beginning of the BB is reached. // After liveness is calculated, we can determine if Reg is still in use and not // defined inbetween the instructions. -static bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start, - MCRegister Reg, const SIRegisterInfo *TRI, - MachineRegisterInfo &MRI, - bool useLiveOuts = false, - bool ignoreStart = false) { +bool SIOptimizeExecMasking::isRegisterInUseBetween(MachineInstr &Stop, + MachineInstr &Start, + MCRegister Reg, + bool UseLiveOuts, + bool IgnoreStart) const { LivePhysRegs LR(*TRI); - if (useLiveOuts) + if (UseLiveOuts) LR.addLiveOuts(*Stop.getParent()); MachineBasicBlock::reverse_iterator A(Start); MachineBasicBlock::reverse_iterator E(Stop); - if (ignoreStart) + if (IgnoreStart) ++A; for (; A != Stop.getParent()->rend() && A != Stop; ++A) { LR.stepBackward(*A); } - return !LR.available(MRI, Reg); + return !LR.available(*MRI, Reg); } // Determine if a register Reg is not re-defined and still in use // in the range (Stop..BB.end]. -static bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg, - const SIRegisterInfo *TRI, - MachineRegisterInfo &MRI) { - return isRegisterInUseBetween(Stop, *Stop.getParent()->rbegin(), Reg, TRI, - MRI, true); +bool SIOptimizeExecMasking::isRegisterInUseAfter(MachineInstr &Stop, + MCRegister Reg) const { + return isRegisterInUseBetween(Stop, *Stop.getParent()->rbegin(), Reg, true); } -// Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence -// by looking at an instance of a s_and_saveexec instruction. Returns a pointer -// to the v_cmp instruction if it is safe to replace the sequence (see the -// conditions in the function body). This is after register allocation, so some -// checks on operand dependencies need to be considered. -static MachineInstr *findPossibleVCMPVCMPXOptimization( - MachineInstr &SaveExec, MCRegister Exec, const SIRegisterInfo *TRI, - const SIInstrInfo *TII, MachineRegisterInfo &MRI) { - - MachineInstr *VCmp = nullptr; - - Register SaveExecDest = SaveExec.getOperand(0).getReg(); - if (!TRI->isSGPRReg(MRI, SaveExecDest)) - return nullptr; - - MachineOperand *SaveExecSrc0 = - TII->getNamedOperand(SaveExec, AMDGPU::OpName::src0); - if (!SaveExecSrc0->isReg()) - return nullptr; - - // Try to find the last v_cmp instruction that defs the saveexec input - // operand without any write to Exec or the saveexec input operand inbetween. - VCmp = findInstrBackwards( - SaveExec, - [&](MachineInstr *Check) { - return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 && - Check->modifiesRegister(SaveExecSrc0->getReg(), TRI); - }, - {Exec, SaveExecSrc0->getReg()}, TRI); - - if (!VCmp) - return nullptr; - - MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst); - assert(VCmpDest && "Should have an sdst operand!"); - - // Check if any of the v_cmp source operands is written by the saveexec. - MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0); - if (Src0->isReg() && TRI->isSGPRReg(MRI, Src0->getReg()) && - SaveExec.modifiesRegister(Src0->getReg(), TRI)) - return nullptr; - - MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1); - if (Src1->isReg() && TRI->isSGPRReg(MRI, Src1->getReg()) && - SaveExec.modifiesRegister(Src1->getReg(), TRI)) - return nullptr; - - // Don't do the transformation if the destination operand is included in - // it's MBB Live-outs, meaning it's used in any of it's successors, leading - // to incorrect code if the v_cmp and therefore the def of - // the dest operand is removed. - if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) - return nullptr; - - // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the - // s_and_saveexec, skip the optimization. - if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), TRI, MRI, - false, true) || - isRegisterInUseAfter(SaveExec, VCmpDest->getReg(), TRI, MRI)) - return nullptr; - - // Try to determine if there is a write to any of the VCmp - // operands between the saveexec and the vcmp. - // If yes, additional VGPR spilling might need to be inserted. In this case, - // it's not worth replacing the instruction sequence. - SmallVector<MCRegister, 2> NonDefRegs; - if (Src0->isReg()) - NonDefRegs.push_back(Src0->getReg()); - - if (Src1->isReg()) - NonDefRegs.push_back(Src1->getReg()); - - if (!findInstrBackwards( - SaveExec, [&](MachineInstr *Check) { return Check == VCmp; }, - NonDefRegs, TRI)) - return nullptr; - - return VCmp; -} - -// Inserts the optimized s_mov_b32 / v_cmpx sequence based on the -// operands extracted from a v_cmp ..., s_and_saveexec pattern. -static bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr, - MachineInstr &VCmp, MCRegister Exec, - const SIInstrInfo *TII, - const SIRegisterInfo *TRI, - MachineRegisterInfo &MRI) { - const int NewOpcode = AMDGPU::getVCMPXOpFromVCMP(VCmp.getOpcode()); - - if (NewOpcode == -1) - return false; - - MachineOperand *Src0 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src0); - MachineOperand *Src1 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src1); - - Register MoveDest = SaveExecInstr.getOperand(0).getReg(); - - MachineBasicBlock::instr_iterator InsertPosIt = SaveExecInstr.getIterator(); - if (!SaveExecInstr.uses().empty()) { - bool isSGPR32 = TRI->getRegSizeInBits(MoveDest, MRI) == 32; - unsigned MovOpcode = isSGPR32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; - BuildMI(*SaveExecInstr.getParent(), InsertPosIt, - SaveExecInstr.getDebugLoc(), TII->get(MovOpcode), MoveDest) - .addReg(Exec); - } - - // Omit dst as V_CMPX is implicitly writing to EXEC. - // Add dummy src and clamp modifiers, if needed. - auto Builder = BuildMI(*VCmp.getParent(), std::next(InsertPosIt), - VCmp.getDebugLoc(), TII->get(NewOpcode)); - - auto TryAddImmediateValueFromNamedOperand = - [&](unsigned OperandName) -> void { - if (auto *Mod = TII->getNamedOperand(VCmp, OperandName)) - Builder.addImm(Mod->getImm()); - }; - - TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src0_modifiers); - Builder.add(*Src0); - - TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src1_modifiers); - Builder.add(*Src1); - - TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::clamp); - - // The kill flags may no longer be correct. - if (Src0->isReg()) - MRI.clearKillFlags(Src0->getReg()); - if (Src1->isReg()) - MRI.clearKillFlags(Src1->getReg()); - - return true; -} - -bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction())) - return false; - - const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); - MachineRegisterInfo *MRI = &MF.getRegInfo(); - MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - - // Optimize sequences emitted for control flow lowering. They are originally - // emitted as the separate operations because spill code may need to be - // inserted for the saved copy of exec. - // - // x = copy exec - // z = s_<op>_b64 x, y - // exec = copy z - // => - // x = s_<op>_saveexec_b64 y - // +// Optimize sequences emitted for control flow lowering. They are originally +// emitted as the separate operations because spill code may need to be +// inserted for the saved copy of exec. +// +// x = copy exec +// z = s_<op>_b64 x, y +// exec = copy z +// => +// x = s_<op>_saveexec_b64 y +// +bool SIOptimizeExecMasking::optimizeExecSequence() const { + MCRegister Exec = TRI->getExec(); bool Changed = false; - for (MachineBasicBlock &MBB : MF) { - MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB); + for (MachineBasicBlock &MBB : *MF) { + MachineBasicBlock::reverse_iterator I = fixTerminators(MBB); MachineBasicBlock::reverse_iterator E = MBB.rend(); if (I == E) continue; @@ -532,7 +411,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { unsigned SearchCount = 0; const unsigned SearchLimit = 5; while (I != E && SearchCount++ < SearchLimit) { - CopyToExec = isCopyToExec(*I, ST); + CopyToExec = isCopyToExec(*I); if (CopyToExec) break; ++I; @@ -542,8 +421,8 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { continue; // Scan backwards to find the def. - auto CopyToExecInst = &*I; - auto CopyFromExecInst = findExecCopy(*TII, ST, MBB, I, CopyToExec); + auto *CopyToExecInst = &*I; + auto CopyFromExecInst = findExecCopy(MBB, I, CopyToExec); if (CopyFromExecInst == E) { auto PrepareExecInst = std::next(I); if (PrepareExecInst == E) @@ -574,8 +453,9 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { MachineInstr *SaveExecInst = nullptr; SmallVector<MachineInstr *, 4> OtherUseInsts; - for (MachineBasicBlock::iterator J - = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator(); + for (MachineBasicBlock::iterator + J = std::next(CopyFromExecInst->getIterator()), + JE = I->getIterator(); J != JE; ++J) { if (SaveExecInst && J->readsRegister(Exec, TRI)) { LLVM_DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n'); @@ -655,58 +535,210 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())), CopyFromExec) - .addReg(OtherOp->getReg()); + .addReg(OtherOp->getReg()); SaveExecInst->eraseFromParent(); CopyToExecInst->eraseFromParent(); for (MachineInstr *OtherInst : OtherUseInsts) { - OtherInst->substituteRegister(CopyToExec, Exec, - AMDGPU::NoSubRegister, *TRI); + OtherInst->substituteRegister(CopyToExec, Exec, AMDGPU::NoSubRegister, + *TRI); } Changed = true; } - // After all s_op_saveexec instructions are inserted, - // replace (on GFX10.3 and later) - // v_cmp_* SGPR, IMM, VGPR - // s_and_saveexec_b32 EXEC_SGPR_DEST, SGPR - // with - // s_mov_b32 EXEC_SGPR_DEST, exec_lo - // v_cmpx_* IMM, VGPR - // to reduce pipeline stalls. - if (ST.hasGFX10_3Insts()) { - DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping; - const unsigned AndSaveExecOpcode = - ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; - - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - // Record relevant v_cmp / s_and_saveexec instruction pairs for - // replacement. - if (MI.getOpcode() != AndSaveExecOpcode) - continue; + return Changed; +} - if (MachineInstr *VCmp = - findPossibleVCMPVCMPXOptimization(MI, Exec, TRI, TII, *MRI)) - SaveExecVCmpMapping[&MI] = VCmp; - } +// Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence +// by looking at an instance of a s_and_saveexec instruction. Returns a pointer +// to the v_cmp instruction if it is safe to replace the sequence (see the +// conditions in the function body). This is after register allocation, so some +// checks on operand dependencies need to be considered. +MachineInstr *SIOptimizeExecMasking::findPossibleVCMPVCMPXOptimization( + MachineInstr &SaveExec, MCRegister Exec) const { + + MachineInstr *VCmp = nullptr; + + Register SaveExecDest = SaveExec.getOperand(0).getReg(); + if (!TRI->isSGPRReg(*MRI, SaveExecDest)) + return nullptr; + + MachineOperand *SaveExecSrc0 = + TII->getNamedOperand(SaveExec, AMDGPU::OpName::src0); + if (!SaveExecSrc0->isReg()) + return nullptr; + + // Try to find the last v_cmp instruction that defs the saveexec input + // operand without any write to Exec or the saveexec input operand inbetween. + VCmp = findInstrBackwards( + SaveExec, + [&](MachineInstr *Check) { + return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 && + Check->modifiesRegister(SaveExecSrc0->getReg(), TRI); + }, + {Exec, SaveExecSrc0->getReg()}); + + if (!VCmp) + return nullptr; + + MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst); + assert(VCmpDest && "Should have an sdst operand!"); + + // Check if any of the v_cmp source operands is written by the saveexec. + MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0); + if (Src0->isReg() && TRI->isSGPRReg(*MRI, Src0->getReg()) && + SaveExec.modifiesRegister(Src0->getReg(), TRI)) + return nullptr; + + MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1); + if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) && + SaveExec.modifiesRegister(Src1->getReg(), TRI)) + return nullptr; + + // Don't do the transformation if the destination operand is included in + // it's MBB Live-outs, meaning it's used in any of it's successors, leading + // to incorrect code if the v_cmp and therefore the def of + // the dest operand is removed. + if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) + return nullptr; + + // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the + // s_and_saveexec, skip the optimization. + if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), false, + true) || + isRegisterInUseAfter(SaveExec, VCmpDest->getReg())) + return nullptr; + + // Try to determine if there is a write to any of the VCmp + // operands between the saveexec and the vcmp. + // If yes, additional VGPR spilling might need to be inserted. In this case, + // it's not worth replacing the instruction sequence. + SmallVector<MCRegister, 2> NonDefRegs; + if (Src0->isReg()) + NonDefRegs.push_back(Src0->getReg()); + + if (Src1->isReg()) + NonDefRegs.push_back(Src1->getReg()); + + if (!findInstrBackwards( + SaveExec, [&](MachineInstr *Check) { return Check == VCmp; }, + NonDefRegs)) + return nullptr; + + return VCmp; +} + +// Inserts the optimized s_mov_b32 / v_cmpx sequence based on the +// operands extracted from a v_cmp ..., s_and_saveexec pattern. +bool SIOptimizeExecMasking::optimizeSingleVCMPSaveExecSequence( + MachineInstr &SaveExecInstr, MachineInstr &VCmp, MCRegister Exec) const { + const int NewOpcode = AMDGPU::getVCMPXOpFromVCMP(VCmp.getOpcode()); + + if (NewOpcode == -1) + return false; + + MachineOperand *Src0 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src0); + MachineOperand *Src1 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src1); + + Register MoveDest = SaveExecInstr.getOperand(0).getReg(); + + MachineBasicBlock::instr_iterator InsertPosIt = SaveExecInstr.getIterator(); + if (!SaveExecInstr.uses().empty()) { + bool IsSGPR32 = TRI->getRegSizeInBits(MoveDest, *MRI) == 32; + unsigned MovOpcode = IsSGPR32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + BuildMI(*SaveExecInstr.getParent(), InsertPosIt, + SaveExecInstr.getDebugLoc(), TII->get(MovOpcode), MoveDest) + .addReg(Exec); + } + + // Omit dst as V_CMPX is implicitly writing to EXEC. + // Add dummy src and clamp modifiers, if needed. + auto Builder = BuildMI(*VCmp.getParent(), std::next(InsertPosIt), + VCmp.getDebugLoc(), TII->get(NewOpcode)); + + auto TryAddImmediateValueFromNamedOperand = + [&](unsigned OperandName) -> void { + if (auto *Mod = TII->getNamedOperand(VCmp, OperandName)) + Builder.addImm(Mod->getImm()); + }; + + TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src0_modifiers); + Builder.add(*Src0); + + TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src1_modifiers); + Builder.add(*Src1); + + TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::clamp); + + // The kill flags may no longer be correct. + if (Src0->isReg()) + MRI->clearKillFlags(Src0->getReg()); + if (Src1->isReg()) + MRI->clearKillFlags(Src1->getReg()); + + return true; +} + +// After all s_op_saveexec instructions are inserted, +// replace (on GFX10.3 and later) +// v_cmp_* SGPR, IMM, VGPR +// s_and_saveexec_b32 EXEC_SGPR_DEST, SGPR +// with +// s_mov_b32 EXEC_SGPR_DEST, exec_lo +// v_cmpx_* IMM, VGPR +// to reduce pipeline stalls. +bool SIOptimizeExecMasking::optimizeVCmpxAndSaveexecSequence() const { + if (!ST->hasGFX10_3Insts()) + return false; + + bool Changed = false; + + DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping; + MCRegister Exec = TRI->getExec(); + const unsigned AndSaveExecOpcode = + ST->isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; + + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + // Record relevant v_cmp / s_and_saveexec instruction pairs for + // replacement. + if (MI.getOpcode() != AndSaveExecOpcode) + continue; + + if (MachineInstr *VCmp = findPossibleVCMPVCMPXOptimization(MI, Exec)) + SaveExecVCmpMapping[&MI] = VCmp; } + } - for (const auto &Entry : SaveExecVCmpMapping) { - MachineInstr *SaveExecInstr = Entry.getFirst(); - MachineInstr *VCmpInstr = Entry.getSecond(); + for (const auto &Entry : SaveExecVCmpMapping) { + MachineInstr *SaveExecInstr = Entry.getFirst(); + MachineInstr *VCmpInstr = Entry.getSecond(); - if (optimizeVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec, TII, - TRI, *MRI)) { - SaveExecInstr->eraseFromParent(); - VCmpInstr->eraseFromParent(); + if (optimizeSingleVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec)) { + SaveExecInstr->eraseFromParent(); + VCmpInstr->eraseFromParent(); - Changed = true; - } + Changed = true; } } return Changed; } + +bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + this->MF = &MF; + ST = &MF.getSubtarget<GCNSubtarget>(); + TRI = ST->getRegisterInfo(); + TII = ST->getInstrInfo(); + MRI = &MF.getRegInfo(); + + bool Changed = optimizeExecSequence(); + Changed |= optimizeVCmpxAndSaveexecSequence(); + + return Changed; +} diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index e5e65a8dbbf1..57dbad468de8 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -159,6 +159,9 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { return false; Register SelReg = Op1->getReg(); + if (SelReg.isPhysical()) + return false; + auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS); if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) return false; @@ -264,13 +267,11 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { // Try to remove v_cndmask_b32. if (SelLI) { - bool CanRemoveSel = SelLI->Query(CmpIdx.getRegSlot()).isKill(); - if (!CanRemoveSel) { - // Try to shrink the live interval and check for dead def instead. - LIS->shrinkToUses(SelLI, nullptr); - CanRemoveSel = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); - } - if (CanRemoveSel) { + // Kill status must be checked before shrinking the live range. + bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill(); + LIS->shrinkToUses(SelLI); + bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); + if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) { LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index ad1455ed20fd..b32d5bb04d5b 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2933,6 +2933,10 @@ MCRegister SIRegisterInfo::getVCC() const { return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC; } +MCRegister SIRegisterInfo::getExec() const { + return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; +} + const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const { // VGPR tuples have an alignment requirement on gfx90a variants. return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 9bfbc253410b..6024158be181 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -344,6 +344,8 @@ public: MCRegister getVCC() const; + MCRegister getExec() const; + const TargetRegisterClass *getRegClass(unsigned RCID) const; // Find reaching register definition diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index e4ab72f1095b..2f334e211181 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -277,6 +277,18 @@ struct VOPC64DPPInfo { uint16_t Opcode; }; +struct VOPDComponentInfo { + uint16_t BaseVOP; + uint16_t VOPDOp; + bool CanBeVOPDX; +}; + +struct VOPDInfo { + uint16_t Opcode; + uint16_t OpX; + uint16_t OpY; +}; + #define GET_MTBUFInfoTable_DECL #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL @@ -293,6 +305,10 @@ struct VOPC64DPPInfo { #define GET_VOPC64DPPTable_IMPL #define GET_VOPC64DPP8Table_DECL #define GET_VOPC64DPP8Table_IMPL +#define GET_VOPDComponentTable_DECL +#define GET_VOPDComponentTable_IMPL +#define GET_VOPDPairs_DECL +#define GET_VOPDPairs_IMPL #define GET_WMMAOpcode2AddrMappingTable_DECL #define GET_WMMAOpcode2AddrMappingTable_IMPL #define GET_WMMAOpcode3AddrMappingTable_DECL @@ -398,6 +414,19 @@ bool getMAIIsGFX940XDL(unsigned Opc) { return Info ? Info->is_gfx940_xdl : false; } +CanBeVOPD getCanBeVOPD(unsigned Opc) { + const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); + if (Info) + return {Info->CanBeVOPDX, 1}; + else + return {0, 0}; +} + +unsigned getVOPDOpcode(unsigned Opc) { + const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); + return Info ? Info->VOPDOp : ~0u; +} + unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); return Info ? Info->Opcode3Addr : ~0u; @@ -415,6 +444,11 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) { return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); } +int getVOPDFull(unsigned OpX, unsigned OpY) { + const VOPDInfo *Info = getVOPDInfoFromComponentOpcodes(OpX, OpY); + return Info ? Info->Opcode : -1; +} + namespace IsaInfo { AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index dffeec10a14a..51cf1678207c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -470,6 +470,14 @@ bool getMAIIsDGEMM(unsigned Opc); LLVM_READONLY bool getMAIIsGFX940XDL(unsigned Opc); +struct CanBeVOPD { + bool X; + bool Y; +}; + +LLVM_READONLY +CanBeVOPD getCanBeVOPD(unsigned Opc); + LLVM_READONLY const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, @@ -483,6 +491,12 @@ LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); LLVM_READONLY +unsigned getVOPDOpcode(unsigned Opc); + +LLVM_READONLY +int getVOPDFull(unsigned OpX, unsigned OpY); + +LLVM_READONLY unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); LLVM_READONLY diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 1485a1e63129..b24857edb59a 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -495,9 +495,9 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=* bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); let InsDPP8 = (ins DstRCDPP:$old, - Src0DPP:$src0, - Src1DPP:$src1, - dpp8:$dpp8, FI:$fi); + Src0DPP:$src0, + Src1DPP:$src1, + dpp8:$dpp8, FI:$fi); let HasExt = 1; let HasExtDPP = 1; diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index eb6c54a45263..33d3441e94c2 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -1108,7 +1108,6 @@ class VOPC64_DPP_Base<bits<10> op, string OpName, VOPProfile P> // Inst{87-84} ignored by hw let Inst{91-88} = bank_mask; let Inst{95-92} = row_mask; - } class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName> @@ -1148,7 +1147,6 @@ class VOPC64_DPP8_Base<bits<10> op, string OpName, VOPProfile P> let Inst{40-32} = fi; let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); let Inst{95-72} = dpp8{23-0}; - } class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName> diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 8cd3d2fe2c47..187485ffa3ae 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1215,7 +1215,9 @@ class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VO let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); - let HasModifiers = !if(Features.IsMAI, 0, !or(Features.IsPacked, P.HasModifiers)); + let HasModifiers = + !if (Features.IsMAI, 0, + !or(Features.IsPacked, Features.HasOpSel, P.HasModifiers)); } class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_Base<P, Features> { @@ -1414,7 +1416,7 @@ multiclass VOP3_Realtriple_with_name_gfx11<bits<10> op, string opName, VOP3_Real_dpp8_with_name_gfx11<op, opName, asmName>; multiclass VOP3Only_Realtriple_with_name_gfx11<bits<10> op, string opName, - string asmName> : + string asmName> : VOP3_Realtriple_with_name_gfx11<op, opName, asmName, 1>; multiclass VOP3be_Realtriple_gfx11< diff --git a/llvm/lib/Target/ARC/ARCAsmPrinter.cpp b/llvm/lib/Target/ARC/ARCAsmPrinter.cpp index 0390c01eecb1..cee2fc7d2bf0 100644 --- a/llvm/lib/Target/ARC/ARCAsmPrinter.cpp +++ b/llvm/lib/Target/ARC/ARCAsmPrinter.cpp @@ -49,6 +49,9 @@ public: } // end anonymous namespace void ARCAsmPrinter::emitInstruction(const MachineInstr *MI) { + ARC_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + SmallString<128> Str; raw_svector_ostream O(Str); diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp index d4f74fa77fc4..36b00af2c0b4 100644 --- a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp +++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp @@ -26,6 +26,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "ARCGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h index ab06ce46d99f..5f83b48b36af 100644 --- a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h +++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h @@ -28,6 +28,7 @@ class Target; // Defines symbolic names for the ARC instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "ARCGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 48559a89a30a..73970b9c74c5 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -378,13 +378,13 @@ def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2", "Prefer 32-bit alignment for loops">; -def FeatureMVEVectorCostFactor1 : SubtargetFeature<"mve1beat", "MVEVectorCostFactor", "1", +def FeatureMVEVectorCostFactor1 : SubtargetFeature<"mve1beat", "MVEVectorCostFactor", "4", "Model MVE instructions as a 1 beat per tick architecture">; def FeatureMVEVectorCostFactor2 : SubtargetFeature<"mve2beat", "MVEVectorCostFactor", "2", "Model MVE instructions as a 2 beats per tick architecture">; -def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFactor", "4", +def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFactor", "1", "Model MVE instructions as a 4 beats per tick architecture">; /// Some instructions update CPSR partially, which can add false dependency for @@ -1450,6 +1450,13 @@ def : ProcessorModel<"cortex-m55", CortexM4Model, [ARMv81mMainline, HasMVEFloatOps, FeatureFixCMSE_CVE_2021_35465]>; +def : ProcessorModel<"cortex-m85", CortexM7Model, [ARMv81mMainline, + FeatureDSP, + FeatureFPARMv8_D16, + FeaturePACBTI, + FeatureUseMISched, + HasMVEFloatOps]>; + def : ProcNoItin<"cortex-a32", [ARMv8a, FeatureHWDivThumb, FeatureHWDivARM, diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 4aa28bc5d28d..57cbd7a3b2b8 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1337,6 +1337,10 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { #include "ARMGenMCPseudoLowering.inc" void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { + // TODOD FIXME: Enable feature predicate checks once all the test pass. + // ARM_MC::verifyInstructionPredicates(MI->getOpcode(), + // getSubtargetInfo().getFeatureBits()); + const DataLayout &DL = getDataLayout(); MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 85e32c08c74c..e6be93e6480a 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -450,6 +450,14 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::TRUNCATE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + + if (!HasMVEFP) { + setOperationAction(ISD::SINT_TO_FP, VT, Expand); + setOperationAction(ISD::UINT_TO_FP, VT, Expand); + setOperationAction(ISD::FP_TO_SINT, VT, Expand); + setOperationAction(ISD::FP_TO_UINT, VT, Expand); + } } setOperationAction(ISD::SETCC, MVT::v2i1, Expand); setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand); @@ -13350,14 +13358,14 @@ static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG) { // to make better use of vaddva style instructions. if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) && IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) && - !isa<ConstantSDNode>(N0)) { + !isa<ConstantSDNode>(N0) && N1->hasOneUse()) { SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0)); return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1)); } // And turn add(add(A, reduce(B)), add(C, reduce(D))) -> // add(add(add(A, C), reduce(B)), reduce(D)) if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD && - N1.getOpcode() == ISD::ADD) { + N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) { unsigned N0RedOp = 0; if (!IsVecReduce(N0.getOperand(N0RedOp))) { N0RedOp = 1; @@ -13424,7 +13432,7 @@ static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG) { }; SDValue X; - if (N0.getOpcode() == ISD::ADD) { + if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) { if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) { int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0), N0.getOperand(1).getOperand(0)); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 3a9946ee810b..ba1d806c8d81 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2247,15 +2247,15 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, return canTailPredicateLoop(L, LI, SE, DL, LAI); } -bool ARMTTIImpl::emitGetActiveLaneMask() const { +PredicationStyle ARMTTIImpl::emitGetActiveLaneMask() const { if (!ST->hasMVEIntegerOps() || !EnableTailPredication) - return false; + return PredicationStyle::None; // Intrinsic @llvm.get.active.lane.mask is supported. // It is used in the MVETailPredication pass, which requires the number of // elements processed by this vector loop to setup the tail-predicated // loop. - return true; + return PredicationStyle::Data; } void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index d7a2bdb3db15..dcf82e703a7f 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -298,7 +298,7 @@ public: TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE); - bool emitGetActiveLaneMask() const; + PredicationStyle emitGetActiveLaneMask() const; void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 3f1379f135d1..9f85d72cc810 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -133,6 +133,7 @@ static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, } #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "ARMGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index e0c992f4fae2..3066d9ba6783 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -139,6 +139,7 @@ bool isCDECoproc(size_t Coproc, const MCSubtargetInfo &STI); // Defines symbolic names for the ARM instructions. // #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "ARMGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index 30785340ef12..296801094fbe 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -351,13 +351,13 @@ Optional<int64_t> MVEGatherScatterLowering::getIfConst(const Value *V) { if (!Op0 || !Op1) return Optional<int64_t>{}; if (I->getOpcode() == Instruction::Add) - return Optional<int64_t>{Op0.getValue() + Op1.getValue()}; + return Optional<int64_t>{Op0.value() + Op1.value()}; if (I->getOpcode() == Instruction::Mul) - return Optional<int64_t>{Op0.getValue() * Op1.getValue()}; + return Optional<int64_t>{Op0.value() * Op1.value()}; if (I->getOpcode() == Instruction::Shl) - return Optional<int64_t>{Op0.getValue() << Op1.getValue()}; + return Optional<int64_t>{Op0.value() << Op1.value()}; if (I->getOpcode() == Instruction::Or) - return Optional<int64_t>{Op0.getValue() | Op1.getValue()}; + return Optional<int64_t>{Op0.value() | Op1.value()}; } return Optional<int64_t>{}; } diff --git a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp index 0001e520b1fb..70fc90bf9eb5 100644 --- a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp @@ -180,6 +180,10 @@ bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } void AVRAsmPrinter::emitInstruction(const MachineInstr *MI) { + // FIXME: Enable feature predicate checks once all the test pass. + // AVR_MC::verifyInstructionPredicates(MI->getOpcode(), + // getSubtargetInfo().getFeatureBits()); + AVRMCInstLower MCInstLowering(OutContext, *this); MCInst I; diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp index cdfe4a21105d..ba370261e284 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp @@ -27,6 +27,7 @@ #include "llvm/MC/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "AVRGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h index aaf236d82016..e83d674f87cc 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h @@ -49,6 +49,7 @@ std::unique_ptr<MCObjectTargetWriter> createAVRELFObjectWriter(uint8_t OSABI); #include "AVRGenRegisterInfo.inc" #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "AVRGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 349cdd92ae62..9aad9375d913 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -149,6 +149,13 @@ private: // The base call is not an input of any other preserve_* // intrinsics. std::map<CallInst *, CallInfo> BaseAICalls; + // A map to hold <AnonRecord, TypeDef> relationships + std::map<DICompositeType *, DIDerivedType *> AnonRecords; + + void CheckAnonRecordType(DIDerivedType *ParentTy, DIType *Ty); + void CheckCompositeType(DIDerivedType *ParentTy, DICompositeType *CTy); + void CheckDerivedType(DIDerivedType *ParentTy, DIDerivedType *DTy); + void ResetMetadata(struct CallInfo &CInfo); bool doTransformation(Function &F); @@ -221,10 +228,80 @@ bool BPFAbstractMemberAccess::run(Function &F) { if (M->debug_compile_units().empty()) return false; + // For each argument/return/local_variable type, trace the type + // pattern like '[derived_type]* [composite_type]' to check + // and remember (anon record -> typedef) relations where the + // anon record is defined as + // typedef [const/volatile/restrict]* [anon record] + DISubprogram *SP = F.getSubprogram(); + if (SP && SP->isDefinition()) { + for (DIType *Ty: SP->getType()->getTypeArray()) + CheckAnonRecordType(nullptr, Ty); + for (const DINode *DN : SP->getRetainedNodes()) { + if (const auto *DV = dyn_cast<DILocalVariable>(DN)) + CheckAnonRecordType(nullptr, DV->getType()); + } + } + DL = &M->getDataLayout(); return doTransformation(F); } +void BPFAbstractMemberAccess::ResetMetadata(struct CallInfo &CInfo) { + if (auto Ty = dyn_cast<DICompositeType>(CInfo.Metadata)) { + if (AnonRecords.find(Ty) != AnonRecords.end()) { + if (AnonRecords[Ty] != nullptr) + CInfo.Metadata = AnonRecords[Ty]; + } + } +} + +void BPFAbstractMemberAccess::CheckCompositeType(DIDerivedType *ParentTy, + DICompositeType *CTy) { + if (!CTy->getName().empty() || !ParentTy || + ParentTy->getTag() != dwarf::DW_TAG_typedef) + return; + + if (AnonRecords.find(CTy) == AnonRecords.end()) { + AnonRecords[CTy] = ParentTy; + return; + } + + // Two or more typedef's may point to the same anon record. + // If this is the case, set the typedef DIType to be nullptr + // to indicate the duplication case. + DIDerivedType *CurrTy = AnonRecords[CTy]; + if (CurrTy == ParentTy) + return; + AnonRecords[CTy] = nullptr; +} + +void BPFAbstractMemberAccess::CheckDerivedType(DIDerivedType *ParentTy, + DIDerivedType *DTy) { + DIType *BaseType = DTy->getBaseType(); + if (!BaseType) + return; + + unsigned Tag = DTy->getTag(); + if (Tag == dwarf::DW_TAG_pointer_type) + CheckAnonRecordType(nullptr, BaseType); + else if (Tag == dwarf::DW_TAG_typedef) + CheckAnonRecordType(DTy, BaseType); + else + CheckAnonRecordType(ParentTy, BaseType); +} + +void BPFAbstractMemberAccess::CheckAnonRecordType(DIDerivedType *ParentTy, + DIType *Ty) { + if (!Ty) + return; + + if (auto *CTy = dyn_cast<DICompositeType>(Ty)) + return CheckCompositeType(ParentTy, CTy); + else if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) + return CheckDerivedType(ParentTy, DTy); +} + static bool SkipDIDerivedTag(unsigned Tag, bool skipTypedef) { if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && @@ -298,6 +375,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); if (!CInfo.Metadata) report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic"); + ResetMetadata(CInfo); CInfo.AccessIndex = getConstant(Call->getArgOperand(1)); CInfo.Base = Call->getArgOperand(0); return true; @@ -307,6 +385,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); if (!CInfo.Metadata) report_fatal_error("Missing metadata for llvm.preserve.struct.access.index intrinsic"); + ResetMetadata(CInfo); CInfo.AccessIndex = getConstant(Call->getArgOperand(2)); CInfo.Base = Call->getArgOperand(0); CInfo.RecordAlignment = DL->getABITypeAlign(getBaseElementType(Call)); diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp index d6145f53c170..c8849bd50464 100644 --- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp @@ -138,6 +138,9 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) { + BPF_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + MCInst TmpInst; if (!BTF || !BTF->InstLower(MI, TmpInst)) { diff --git a/llvm/lib/Target/BPF/BTF.h b/llvm/lib/Target/BPF/BTF.h index 4540054aaf34..89852be4a8c8 100644 --- a/llvm/lib/Target/BPF/BTF.h +++ b/llvm/lib/Target/BPF/BTF.h @@ -48,6 +48,8 @@ #ifndef LLVM_LIB_TARGET_BPF_BTF_H #define LLVM_LIB_TARGET_BPF_BTF_H +#include <cstdint> + namespace llvm { namespace BTF { diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index a98d001097bc..cb321906db03 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -31,14 +31,13 @@ using namespace llvm; namespace { class BPFMCCodeEmitter : public MCCodeEmitter { - const MCInstrInfo &MCII; const MCRegisterInfo &MRI; bool IsLittleEndian; public: - BPFMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, + BPFMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri, bool IsLittleEndian) - : MCII(mcii), MRI(mri), IsLittleEndian(IsLittleEndian) {} + : MRI(mri), IsLittleEndian(IsLittleEndian) { } BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete; void operator=(const BPFMCCodeEmitter &) = delete; ~BPFMCCodeEmitter() override = default; @@ -62,12 +61,6 @@ public: void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; - -private: - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; }; } // end anonymous namespace @@ -117,9 +110,6 @@ static uint8_t SwapBits(uint8_t Val) void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - verifyInstructionPredicates(MI, - computeAvailableFeatures(STI.getFeatureBits())); - unsigned Opcode = MI.getOpcode(); support::endian::Writer OSE(OS, IsLittleEndian ? support::little : support::big); @@ -174,5 +164,4 @@ uint64_t BPFMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op, return Encoding; } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "BPFGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index 5a1e251cd29c..77db5f99225e 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/Host.h" #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "BPFGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index fc190504581c..ea30e714a5b7 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -54,6 +54,7 @@ std::unique_ptr<MCObjectTargetWriter> createBPFELFObjectWriter(uint8_t OSABI); // Defines symbolic names for the BPF instructions. // #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "BPFGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp index 0236b22ad379..ea5b4555757e 100644 --- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp +++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp @@ -141,6 +141,9 @@ void CSKYAsmPrinter::emitEndOfAsmFile(Module &M) { } void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) { + CSKY_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td index 300ecceae906..8d3835b22bb0 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td @@ -153,7 +153,7 @@ def CSKYSymbol : AsmOperandClass { let ParserMethod = "parseCSKYSymbol"; } -def br_symbol : Operand<iPTR> { +def br_symbol : Operand<OtherVT> { let EncoderMethod = "getBranchSymbolOpValue<CSKY::fixup_csky_pcrel_imm16_scale2>"; let ParserMatchClass = CSKYSymbol; diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td index 3be1ca8b7998..2d7fb85e89fa 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td @@ -24,7 +24,7 @@ def CSKY_NIR : SDNode<"CSKYISD::NIR", SDTNone, // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// -def br_symbol_16bit : Operand<iPTR> { +def br_symbol_16bit : Operand<OtherVT> { let EncoderMethod = "getBranchSymbolOpValue<CSKY::fixup_csky_pcrel_imm10_scale2>"; let ParserMatchClass = CSKYSymbol; diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp index 1a69dc8acde0..64f01cd1c9fa 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp @@ -26,6 +26,7 @@ #include "llvm/MC/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "CSKYGenInstrInfo.inc" #define GET_REGINFO_MC_DESC diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h index 4b8c45e95b74..1137b4d6e9b1 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h @@ -41,6 +41,7 @@ MCCodeEmitter *createCSKYMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx); #include "CSKYGenRegisterInfo.inc" #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "CSKYGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 4d6e1a9d3166..709279889653 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -116,7 +116,7 @@ def ThreadId :dxil_op< "ThreadId", 93, ThreadIdClass, ComputeID, "reads the thr dxil_param<1, "i32", "opcode", "DXIL opcode">, dxil_param<2, "i32", "component", "component to read (x,y,z)"> ]>, - dxil_map_intrinsic<int_dxil_thread_id>; + dxil_map_intrinsic<int_dx_thread_id>; def GroupId :dxil_op< "GroupId", 94, GroupIdClass, ComputeID, "reads the group ID (SV_GroupID)", "i32;", "rn", [ @@ -124,7 +124,7 @@ def GroupId :dxil_op< "GroupId", 94, GroupIdClass, ComputeID, "reads the group dxil_param<1, "i32", "opcode", "DXIL opcode">, dxil_param<2, "i32", "component", "component to read"> ]>, - dxil_map_intrinsic<int_dxil_group_id>; + dxil_map_intrinsic<int_dx_group_id>; def ThreadIdInGroup :dxil_op< "ThreadIdInGroup", 95, ThreadIdInGroupClass, ComputeID, "reads the thread ID within the group (SV_GroupThreadID)", "i32;", "rn", @@ -133,7 +133,7 @@ def ThreadIdInGroup :dxil_op< "ThreadIdInGroup", 95, ThreadIdInGroupClass, Comp dxil_param<1, "i32", "opcode", "DXIL opcode">, dxil_param<2, "i32", "component", "component to read (x,y,z)"> ]>, - dxil_map_intrinsic<int_dxil_thread_id_in_group>; + dxil_map_intrinsic<int_dx_thread_id_in_group>; def FlattenedThreadIdInGroup :dxil_op< "FlattenedThreadIdInGroup", 96, FlattenedThreadIdInGroupClass, ComputeID, "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i32;", "rn", @@ -141,4 +141,4 @@ def FlattenedThreadIdInGroup :dxil_op< "FlattenedThreadIdInGroup", 96, Flattene dxil_param<0, "i32", "", "result">, dxil_param<1, "i32", "opcode", "DXIL opcode"> ]>, - dxil_map_intrinsic<int_dxil_flattened_thread_id_in_group>; + dxil_map_intrinsic<int_dx_flattened_thread_id_in_group>; diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 494a71e51a89..3e09270a66d0 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -595,6 +595,10 @@ unsigned DXILBitcodeWriter::getEncodedRMWOperation(AtomicRMWInst::BinOp Op) { return bitc::RMW_FADD; case AtomicRMWInst::FSub: return bitc::RMW_FSUB; + case AtomicRMWInst::FMax: + return bitc::RMW_FMAX; + case AtomicRMWInst::FMin: + return bitc::RMW_FMIN; } } diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 48d339234e9e..1064296b0991 100644 --- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -743,6 +743,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, /// Print out a single Hexagon MI to the current output stream. void HexagonAsmPrinter::emitInstruction(const MachineInstr *MI) { + Hexagon_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + MCInst MCB; MCB.setOpcode(Hexagon::BUNDLE); MCB.addOperand(MCOperand::createImm(0)); diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index 0b4a95bc9ce5..01501109f3b1 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1024,7 +1024,7 @@ void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { for (auto &B : MF) { auto At = findCFILocation(B); if (At) - insertCFIInstructionsAt(B, At.getValue()); + insertCFIInstructionsAt(B, At.value()); } } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index ed2856eb1fe9..9c235776c160 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -376,11 +376,9 @@ void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, State.Bundle = &MI; State.Index = 0; size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1; - FeatureBitset Features = computeAvailableFeatures(STI.getFeatureBits()); for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) { MCInst &HMI = const_cast<MCInst &>(*I.getInst()); - verifyInstructionPredicates(HMI, Features); EncodeSingleInstruction(HMI, OS, Fixups, STI, parseBits(Last, HMB, HMI)); State.Extended = HexagonMCInstrInfo::isImmext(HMI); @@ -793,5 +791,4 @@ MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII, return new HexagonMCCodeEmitter(MII, MCT); } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "HexagonGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index 9e86dc8e4989..151964bf818b 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -81,11 +81,6 @@ private: // Return parse bits for instruction `MCI' inside bundle `MCB' uint32_t parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const; - - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index d068baf05998..f2d1173cd503 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -46,6 +46,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "HexagonGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index d717e710f3c0..3932077c08f1 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -110,6 +110,7 @@ unsigned HexagonConvertUnits(unsigned ItinUnits, unsigned *Lanes); // #define GET_INSTRINFO_ENUM #define GET_INSTRINFO_SCHED_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "HexagonGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index d715ba901a2b..33e7068622f1 100644 --- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -705,14 +705,14 @@ LanaiAsmParser::parseRegister(bool RestoreOnFailure) { RegNum = MatchRegisterName(Lexer.getTok().getIdentifier()); if (RegNum == 0) { if (PercentTok && RestoreOnFailure) - Lexer.UnLex(PercentTok.getValue()); + Lexer.UnLex(PercentTok.value()); return nullptr; } Parser.Lex(); // Eat identifier token return LanaiOperand::createReg(RegNum, Start, End); } if (PercentTok && RestoreOnFailure) - Lexer.UnLex(PercentTok.getValue()); + Lexer.UnLex(PercentTok.value()); return nullptr; } diff --git a/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp b/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp index c0b7fd3fdd5d..d142fd3a414f 100644 --- a/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp +++ b/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp @@ -195,6 +195,9 @@ void LanaiAsmPrinter::customEmitInstruction(const MachineInstr *MI) { } void LanaiAsmPrinter::emitInstruction(const MachineInstr *MI) { + Lanai_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp index eb6bf8d3836c..c43450869832 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp @@ -28,6 +28,7 @@ #include <string> #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "LanaiGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h index e8da1bc88142..93fe1a4609d8 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h @@ -43,6 +43,7 @@ std::unique_ptr<MCObjectTargetWriter> createLanaiELFObjectWriter(uint8_t OSABI); // Defines symbolic names for the Lanai instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "LanaiGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp index dd61bb2df077..1467d1757ff0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -27,6 +27,9 @@ using namespace llvm; #include "LoongArchGenMCPseudoLowering.inc" void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { + LoongArch_MC::verifyInstructionPredicates( + MI->getOpcode(), getSubtargetInfo().getFeatureBits()); + // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h index 7e5aa49f227c..b51c19188051 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h @@ -39,6 +39,10 @@ public: // tblgen'erated function. bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); + // Wrapper needed for tblgenned pseudo lowering. + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { + return lowerLoongArchMachineOperandToMCOperand(MO, MCOp, *this); + } }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 5b117d40e0a9..20448492a558 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -11,6 +11,22 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// +// LoongArch specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDT_LoongArchMOVGR2FR_W_LA64 + : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>; +def SDT_LoongArchMOVFR2GR_S_LA64 + : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>; +def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; + +def loongarch_movgr2fr_w_la64 + : SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>; +def loongarch_movfr2gr_s_la64 + : SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>; +def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>; + +//===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -149,6 +165,7 @@ def : PatFPSetcc<SETULT, FCMP_CULT_S, FPR32>; def : PatFPSetcc<SETULE, FCMP_CULE_S, FPR32>; def : PatFPSetcc<SETUNE, FCMP_CUNE_S, FPR32>; def : PatFPSetcc<SETUO, FCMP_CUN_S, FPR32>; +def : PatFPSetcc<SETLT, FCMP_CLT_S, FPR32>; // TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions. @@ -174,4 +191,39 @@ def : PatFPSelectcc<SETULE, FCMP_CULE_S, FSEL_S, FPR32>; def : PatFPSelectcc<SETUNE, FCMP_CUNE_S, FSEL_S, FPR32>; def : PatFPSelectcc<SETUO, FCMP_CUN_S, FSEL_S, FPR32>; +/// Loads + +defm : LdPat<load, FLD_S, f32>; + +/// Stores + +defm : StPat<store, FST_S, FPR32, f32>; + +/// Floating point constants + +def : Pat<(f32 fpimm0), (MOVGR2FR_W R0)>; +def : Pat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W R0))>; +def : Pat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1)))>; + +// FP Conversion +def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>; } // Predicates = [HasBasicF] + +let Predicates = [HasBasicF, IsLA64] in { +// GPR -> FPR +def : Pat<(loongarch_movgr2fr_w_la64 GPR:$src), (MOVGR2FR_W GPR:$src)>; +// FPR -> GPR +def : Pat<(loongarch_movfr2gr_s_la64 FPR32:$src), + (MOVFR2GR_S FPR32:$src)>; +// int -> f32 +def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +} // Predicates = [HasBasicF, IsLA64] + +let Predicates = [HasBasicF, IsLA32] in { +// GPR -> FPR +def : Pat<(bitconvert (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>; +// FPR -> GPR +def : Pat<(i32 (bitconvert FPR32:$src)), (MOVFR2GR_S FPR32:$src)>; +// int -> f32 +def : Pat<(f32 (sint_to_fp (i32 GPR:$src))), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +} // Predicates = [HasBasicF, IsLA64] diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index 07fa61f4c361..bb50cec9f4c0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -131,6 +131,11 @@ def MOVGR2FR_D : FP_MOV<0b0000000100010100101010, "movgr2fr.d", FPR64, GPR>; def MOVFR2GR_D : FP_MOV<0b0000000100010100101110, "movfr2gr.d", GPR, FPR64>; } // Predicates = [HasBasicD, IsLA64] +// Instructions only available on LA32 +let Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 in { +def MOVGR2FR_W_64 : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR64, GPR>; +} // Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 + //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// @@ -164,6 +169,7 @@ def : PatFPSetcc<SETULT, FCMP_CULT_D, FPR64>; def : PatFPSetcc<SETULE, FCMP_CULE_D, FPR64>; def : PatFPSetcc<SETUNE, FCMP_CUNE_D, FPR64>; def : PatFPSetcc<SETUO, FCMP_CUN_D, FPR64>; +def : PatFPSetcc<SETLT, FCMP_CLT_D, FPR64>; // TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions. @@ -185,4 +191,52 @@ def : PatFPSelectcc<SETULE, FCMP_CULE_D, FSEL_D, FPR64>; def : PatFPSelectcc<SETUNE, FCMP_CUNE_D, FSEL_D, FPR64>; def : PatFPSelectcc<SETUO, FCMP_CUN_D, FSEL_D, FPR64>; +/// Loads + +defm : LdPat<load, FLD_D, f64>; + +/// Stores + +defm : StPat<store, FST_D, FPR64, f64>; + +/// FP conversion operations + +def : Pat<(loongarch_ftint FPR64:$src), (FTINTRZ_W_D FPR64:$src)>; +def : Pat<(f64 (loongarch_ftint FPR64:$src)), (FTINTRZ_L_D FPR64:$src)>; +def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_L_S FPR32:$src)>; + +// f64 -> f32 +def : Pat<(f32 (fpround FPR64:$src)), (FCVT_S_D FPR64:$src)>; +// f32 -> f64 +def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>; } // Predicates = [HasBasicD] + +/// Floating point constants + +let Predicates = [HasBasicD, IsLA64] in { +def : Pat<(f64 fpimm0), (MOVGR2FR_D R0)>; +def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FR_D R0))>; +def : Pat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D R0, 1)))>; + +// Convert int to FP +def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), + (FFINT_D_W (MOVGR2FR_W GPR:$src))>; +def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>; + +def : Pat<(f64 (uint_to_fp (i64 (zexti32 (i64 GPR:$src))))), + (FFINT_D_W (MOVGR2FR_W GPR:$src))>; + +def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>; + +// Convert FP to int +def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>; +} // Predicates = [HasBasicD, IsLA64] + +let Predicates = [HasBasicD, IsLA32] in { +def : Pat<(f64 fpimm0), (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0)>; +def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0))>; +def : Pat<(f64 fpimm1), (FCVT_D_S (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1))))>; + +// Convert int to FP +def : Pat<(f64 (sint_to_fp (i32 GPR:$src))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>; +} // Predicates = [HasBasicD, IsLA32] diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index 7182d55ca3cf..0d9ec9e2eaaa 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -11,7 +11,9 @@ //===----------------------------------------------------------------------===// #include "LoongArchFrameLowering.h" +#include "LoongArchMachineFunctionInfo.h" #include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -44,12 +46,178 @@ bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const { return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); } +void LoongArchFrameLowering::adjustReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DestReg, + Register SrcReg, int64_t Val, + MachineInstr::MIFlag Flag) const { + const LoongArchInstrInfo *TII = STI.getInstrInfo(); + bool IsLA64 = STI.is64Bit(); + + if (DestReg == SrcReg && Val == 0) + return; + + if (isInt<12>(Val)) { + // addi.w/d $DstReg, $SrcReg, Val + BuildMI(MBB, MBBI, DL, + TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), DestReg) + .addReg(SrcReg) + .addImm(Val) + .setMIFlag(Flag); + return; + } + + report_fatal_error("adjustReg cannot yet handle adjustments >12 bits"); +} + +// Determine the size of the frame and maximum call frame size. +void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo. + uint64_t FrameSize = MFI.getStackSize(); + + // Make sure the frame is aligned. + FrameSize = alignTo(FrameSize, getStackAlign()); + + // Update frame info. + MFI.setStackSize(FrameSize); +} + void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - // TODO: Implement this when we have function calls + MachineFrameInfo &MFI = MF.getFrameInfo(); + const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); + const LoongArchInstrInfo *TII = STI.getInstrInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + + Register SPReg = LoongArch::R3; + Register FPReg = LoongArch::R22; + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; + + // Determine the correct frame layout + determineFrameLayout(MF); + + // First, compute final stack size. + uint64_t StackSize = MFI.getStackSize(); + + // Early exit if there is no need to allocate space in the stack. + if (StackSize == 0 && !MFI.adjustsStack()) + return; + + // Adjust stack. + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); + // Emit ".cfi_def_cfa_offset StackSize". + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + + const auto &CSI = MFI.getCalleeSavedInfo(); + + // The frame pointer is callee-saved, and code has been generated for us to + // save it to the stack. We need to skip over the storing of callee-saved + // registers as the frame pointer must be modified after it has been saved + // to the stack, not before. + std::advance(MBBI, CSI.size()); + + // Iterate over list of callee-saved registers and emit .cfi_offset + // directives. + for (const auto &Entry : CSI) { + int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx()); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, RI->getDwarfRegNum(Entry.getReg(), true), Offset)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + + // Generate new FP. + if (hasFP(MF)) { + adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize, MachineInstr::FrameSetup); + + // Emit ".cfi_def_cfa $fp, 0" + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, RI->getDwarfRegNum(FPReg, true), 0)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } } void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - // TODO: Implement this when we have function calls + const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + Register SPReg = LoongArch::R3; + + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + const auto &CSI = MFI.getCalleeSavedInfo(); + // Skip to before the restores of callee-saved registers. + auto LastFrameDestroy = MBBI; + if (!CSI.empty()) + LastFrameDestroy = std::prev(MBBI, CSI.size()); + + // Get the number of bytes from FrameInfo. + uint64_t StackSize = MFI.getStackSize(); + + // Restore the stack pointer. + if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) { + assert(hasFP(MF) && "frame pointer should not have been eliminated"); + adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, -StackSize, + MachineInstr::FrameDestroy); + } + + // Deallocate stack + adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); +} + +void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + // Unconditionally spill RA and FP only if the function uses a frame + // pointer. + if (hasFP(MF)) { + SavedRegs.set(LoongArch::R1); + SavedRegs.set(LoongArch::R22); + } + // Mark BP as used if function has dedicated base pointer. + if (hasBP(MF)) + SavedRegs.set(LoongArchABI::getBPReg()); +} + +StackOffset LoongArchFrameLowering::getFrameIndexReference( + const MachineFunction &MF, int FI, Register &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + + // Callee-saved registers should be referenced relative to the stack + // pointer (positive offset), otherwise use the frame pointer (negative + // offset). + const auto &CSI = MFI.getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + StackOffset Offset = + StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + + MFI.getOffsetAdjustment()); + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + FrameReg = RI->getFrameRegister(MF); + if ((FI >= MinCSFI && FI <= MaxCSFI) || !hasFP(MF)) { + FrameReg = LoongArch::R3; + Offset += StackOffset::getFixed(MFI.getStackSize()); + } + + return Offset; } diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h index 25c53efc10f1..014b666de711 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -31,8 +31,26 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override { + return MBB.erase(MI); + } + + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const override; + bool hasFP(const MachineFunction &MF) const override; bool hasBP(const MachineFunction &MF) const; + +private: + void determineFrameLayout(MachineFunction &MF) const; + void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DestReg, Register SrcReg, + int64_t Val, MachineInstr::MIFlag Flag) const; }; } // namespace llvm #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index cc9ea0255d98..bb40ff817574 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -33,13 +33,14 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); MVT GRLenVT = Subtarget->getGRLenVT(); SDLoc DL(Node); + MVT VT = Node->getSimpleValueType(0); switch (Opcode) { default: break; case ISD::Constant: { int64_t Imm = cast<ConstantSDNode>(Node)->getSExtValue(); - if (Imm == 0 && Node->getSimpleValueType(0) == GRLenVT) { + if (Imm == 0 && VT == GRLenVT) { SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, LoongArch::R0, GRLenVT); ReplaceNode(Node, New.getNode()); @@ -60,6 +61,15 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, Result); return; } + case ISD::FrameIndex: { + SDValue Imm = CurDAG->getTargetConstant(0, DL, GRLenVT); + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); + unsigned ADDIOp = + Subtarget->is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm)); + return; + } // TODO: Add selection nodes needed later. } @@ -67,6 +77,17 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { SelectCode(Node); } +bool LoongArchDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { + // If this is FrameIndex, select it directly. Otherwise just let it get + // selected to a register independently. + if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) + Base = + CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getGRLenVT()); + else + Base = Addr; + return true; +} + bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { // Shift instructions on LoongArch only read the lower 5 or 6 bits of the @@ -125,6 +146,39 @@ bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, return true; } +bool LoongArchDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { + if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { + Val = N.getOperand(0); + return true; + } + MVT VT = N.getSimpleValueType(); + if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { + Val = N; + return true; + } + + return false; +} + +bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { + if (N.getOpcode() == ISD::AND) { + auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { + Val = N.getOperand(0); + return true; + } + } + MVT VT = N.getSimpleValueType(); + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); + if (CurDAG->MaskedValueIsZero(N, Mask)) { + Val = N; + return true; + } + + return false; +} + // This pass converts a legalized DAG into a LoongArch-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h index f477129d933c..7ad329a64424 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -38,6 +38,8 @@ public: void Select(SDNode *Node) override; + bool SelectBaseAddr(SDValue Addr, SDValue &Base); + bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) { return selectShiftMask(N, Subtarget->getGRLen(), ShAmt); @@ -46,6 +48,9 @@ public: return selectShiftMask(N, 32, ShAmt); } + bool selectSExti32(SDValue N, SDValue &Val); + bool selectZExti32(SDValue N, SDValue &Val); + // Include the pieces autogenerated from the target description. #include "LoongArchGenDAGISel.inc" }; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index d5a469216859..4acf90bd9788 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -17,14 +17,21 @@ #include "LoongArchRegisterInfo.h" #include "LoongArchSubtarget.h" #include "LoongArchTargetMachine.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; #define DEBUG_TYPE "loongarch-isel-lowering" +static cl::opt<bool> ZeroDivCheck( + "loongarch-check-zero-division", cl::Hidden, + cl::desc("Trap on integer division by zero."), + cl::init(false)); + LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -37,15 +44,25 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, if (Subtarget.hasBasicD()) addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, + MVT::i1, Promote); + // TODO: add necessary setOperationAction calls later. setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); + setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); + + setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom); if (Subtarget.is64Bit()) { setOperationAction(ISD::SHL, MVT::i32, Custom); setOperationAction(ISD::SRA, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); } static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE, @@ -58,10 +75,19 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, if (Subtarget.hasBasicD()) { setCondCodeAction(FPCCToExpand, MVT::f64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); } + setOperationAction(ISD::BR_CC, GRLenVT, Expand); setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); + if (!Subtarget.is64Bit()) + setLibcallName(RTLIB::MUL_I128, nullptr); + + setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); // Compute derived properties from the register classes. computeRegisterProperties(STI.getRegisterInfo()); @@ -70,11 +96,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setBooleanContents(ZeroOrOneBooleanContent); + setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); + // Function alignments. const Align FunctionAlignment(4); setMinFunctionAlignment(FunctionAlignment); setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::SRL); } @@ -83,6 +112,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, switch (Op.getOpcode()) { default: report_fatal_error("unimplemented operand"); + case ISD::GlobalAddress: + return lowerGlobalAddress(Op, DAG); case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); case ISD::SRA_PARTS: @@ -96,7 +127,105 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); return SDValue(); + case ISD::ConstantPool: + return lowerConstantPool(Op, DAG); + case ISD::FP_TO_SINT: + return lowerFP_TO_SINT(Op, DAG); + case ISD::BITCAST: + return lowerBITCAST(Op, DAG); + case ISD::FP_TO_UINT: + return SDValue(); + case ISD::UINT_TO_FP: + return lowerUINT_TO_FP(Op, DAG); + } +} + +SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc DL(Op); + auto &TLI = DAG.getTargetLoweringInfo(); + SDValue Tmp1, Tmp2; + SDValue Op1 = Op.getOperand(0); + if (Op1->getOpcode() == ISD::AssertZext || + Op1->getOpcode() == ISD::AssertSext) + return Op; + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op.getOperand(0)); + SDValue Res = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f64, Trunc); + SDNode *N = Res.getNode(); + TLI.expandUINT_TO_FP(N, Tmp1, Tmp2, DAG); + return Tmp1; +} + +SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + + if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && + Subtarget.is64Bit() && Subtarget.hasBasicF()) { + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); } + return Op; +} + +SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc DL(Op); + + if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && + !Subtarget.hasBasicD()) { + SDValue Dst = + DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0)); + return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); + } + + EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); + SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0)); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); +} + +SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT Ty = Op.getValueType(); + ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); + + // FIXME: Only support PC-relative addressing to access the symbol. + // Target flags will be added later. + if (!isPositionIndependent()) { + SDValue ConstantN = DAG.getTargetConstantPool( + N->getConstVal(), Ty, N->getAlign(), N->getOffset()); + SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, ConstantN), + 0); + SDValue Addr(DAG.getMachineNode(Subtarget.is64Bit() ? LoongArch::ADDI_D + : LoongArch::ADDI_W, + DL, Ty, AddrHi, ConstantN), + 0); + return Addr; + } + report_fatal_error("Unable to lower ConstantPool"); +} + +SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT Ty = getPointerTy(DAG.getDataLayout()); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + unsigned ADDIOp = Subtarget.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + + // FIXME: Only support PC-relative addressing to access the symbol. + // TODO: Add target flags. + if (!isPositionIndependent()) { + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty); + SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, GA), 0); + SDValue Addr(DAG.getMachineNode(ADDIOp, DL, Ty, AddrHi, GA), 0); + return Addr; + } + report_fatal_error("Unable to lowerGlobalAddress"); } SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, @@ -238,6 +367,36 @@ void LoongArchTargetLowering::ReplaceNodeResults( break; } break; + case ISD::FP_TO_SINT: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + SDValue Src = N->getOperand(0); + EVT VT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); + SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, VT, Src); + Results.push_back(DAG.getNode(ISD::BITCAST, DL, N->getValueType(0), Dst)); + break; + } + case ISD::BITCAST: { + EVT VT = N->getValueType(0); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && + Subtarget.hasBasicF()) { + SDValue Dst = + DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); + } + break; + } + case ISD::FP_TO_UINT: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + auto &TLI = DAG.getTargetLoweringInfo(); + SDValue Tmp1, Tmp2; + TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); + break; + } } } @@ -345,6 +504,224 @@ static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + MVT GRLenVT = Subtarget.getGRLenVT(); + EVT ValTy = N->getValueType(0); + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + ConstantSDNode *CN0, *CN1; + SDLoc DL(N); + unsigned ValBits = ValTy.getSizeInBits(); + unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; + unsigned Shamt; + bool SwapAndRetried = false; + + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + if (ValBits != 32 && ValBits != 64) + return SDValue(); + +Retry: + // 1st pattern to match BSTRINS: + // R = or (and X, mask0), (and (shl Y, lsb), mask1) + // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 + // => + // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) + if (N0.getOpcode() == ISD::AND && + (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && + (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && + MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && + (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && + (Shamt = CN1->getZExtValue()) == MaskIdx0 && + (MaskIdx0 + MaskLen0 <= ValBits)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + N1.getOperand(0).getOperand(0), + DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + + // 2nd pattern to match BSTRINS: + // R = or (and X, mask0), (shl (and Y, mask1), lsb) + // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) + // => + // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) + if (N0.getOpcode() == ISD::AND && + (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && + (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + (Shamt = CN1->getZExtValue()) == MaskIdx0 && + (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && + isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && + MaskLen0 == MaskLen1 && MaskIdx1 == 0 && + (MaskIdx0 + MaskLen0 <= ValBits)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + N1.getOperand(0).getOperand(0), + DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + + // 3rd pattern to match BSTRINS: + // R = or (and X, mask0), (and Y, mask1) + // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 + // => + // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb + // where msb = lsb + size - 1 + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && + (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + (MaskIdx0 + MaskLen0 <= 64) && + (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) && + (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, + DAG.getConstant(MaskIdx0, DL, GRLenVT)), + DAG.getConstant(ValBits == 32 + ? (MaskIdx0 + (MaskLen0 & 31) - 1) + : (MaskIdx0 + MaskLen0 - 1), + DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + + // 4th pattern to match BSTRINS: + // R = or (and X, mask), (shl Y, shamt) + // where mask = (2**shamt - 1) + // => + // R = BSTRINS X, Y, ValBits - 1, shamt + // where ValBits = 32 or 64 + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && + (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && + isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && + MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + (Shamt = CN1->getZExtValue()) == MaskLen0 && + (MaskIdx0 + MaskLen0 <= ValBits)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + N1.getOperand(0), + DAG.getConstant((ValBits - 1), DL, GRLenVT), + DAG.getConstant(Shamt, DL, GRLenVT)); + } + + // 5th pattern to match BSTRINS: + // R = or (and X, mask), const + // where ~mask = (2**size - 1) << lsb, mask & const = 0 + // => + // R = BSTRINS X, (const >> lsb), msb, lsb + // where msb = lsb + size - 1 + if (N0.getOpcode() == ISD::AND && + (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + (CN1 = dyn_cast<ConstantSDNode>(N1)) && + (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n"); + return DAG.getNode( + LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), + DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + + // 6th pattern. + // a = b | ((c & mask) << shamt), where all positions in b to be overwritten + // by the incoming bits are known to be zero. + // => + // a = BSTRINS b, c, shamt + MaskLen - 1, shamt + // + // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th + // pattern is more common than the 1st. So we put the 1st before the 6th in + // order to match as many nodes as possible. + ConstantSDNode *CNMask, *CNShamt; + unsigned MaskIdx, MaskLen; + if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && + (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && + isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && + MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + CNShamt->getZExtValue() + MaskLen <= ValBits) { + Shamt = CNShamt->getZExtValue(); + APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); + if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, + N1.getOperand(0).getOperand(0), + DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT), + DAG.getConstant(Shamt, DL, GRLenVT)); + } + } + + // 7th pattern. + // a = b | ((c << shamt) & shifted_mask), where all positions in b to be + // overwritten by the incoming bits are known to be zero. + // => + // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx + // + // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd + // before the 7th in order to match as many nodes as possible. + if (N1.getOpcode() == ISD::AND && + (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && + N1.getOperand(0).getOpcode() == ISD::SHL && + (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && + CNShamt->getZExtValue() == MaskIdx) { + APInt ShMask(ValBits, CNMask->getZExtValue()); + if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, + N1.getOperand(0).getOperand(0), + DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), + DAG.getConstant(MaskIdx, DL, GRLenVT)); + } + } + + // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. + if (!SwapAndRetried) { + std::swap(N0, N1); + SwapAndRetried = true; + goto Retry; + } + + SwapAndRetried = false; +Retry2: + // 8th pattern. + // a = b | (c & shifted_mask), where all positions in b to be overwritten by + // the incoming bits are known to be zero. + // => + // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx + // + // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So + // we put it here in order to match as many nodes as possible or generate less + // instructions. + if (N1.getOpcode() == ISD::AND && + (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) { + APInt ShMask(ValBits, CNMask->getZExtValue()); + if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, + DAG.getNode(ISD::SRL, DL, N1->getValueType(0), + N1->getOperand(0), + DAG.getConstant(MaskIdx, DL, GRLenVT)), + DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), + DAG.getConstant(MaskIdx, DL, GRLenVT)); + } + } + // Swap N0/N1 and retry. + if (!SwapAndRetried) { + std::swap(N0, N1); + SwapAndRetried = true; + goto Retry2; + } + + return SDValue(); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -353,12 +730,62 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::AND: return performANDCombine(N, DAG, DCI, Subtarget); + case ISD::OR: + return performORCombine(N, DAG, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); } return SDValue(); } +static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, + MachineBasicBlock &MBB, + const TargetInstrInfo &TII) { + if (!ZeroDivCheck) + return &MBB; + + // Build instructions: + // div(or mod) $dst, $dividend, $divisor + // bnez $divisor, 8 + // break 7 + // fallthrough + MachineOperand &Divisor = MI.getOperand(2); + auto FallThrough = std::next(MI.getIterator()); + + BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BNEZ)) + .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())) + .addImm(8); + + // See linux header file arch/loongarch/include/uapi/asm/break.h for the + // definition of BRK_DIVZERO. + BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BREAK)) + .addImm(7/*BRK_DIVZERO*/); + + // Clear Divisor's kill flag. + Divisor.setIsKill(false); + + return &MBB; +} + +MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *BB) const { + + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected instr type to insert"); + case LoongArch::DIV_W: + case LoongArch::DIV_WU: + case LoongArch::MOD_W: + case LoongArch::MOD_WU: + case LoongArch::DIV_D: + case LoongArch::DIV_DU: + case LoongArch::MOD_D: + case LoongArch::MOD_DU: + return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo()); + break; + } +} + const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((LoongArchISD::NodeType)Opcode) { case LoongArchISD::FIRST_NUMBER: @@ -369,11 +796,16 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { return "LoongArchISD::" #node; // TODO: Add more target-dependent nodes later. + NODE_NAME_CASE(CALL) NODE_NAME_CASE(RET) NODE_NAME_CASE(SLL_W) NODE_NAME_CASE(SRA_W) NODE_NAME_CASE(SRL_W) + NODE_NAME_CASE(BSTRINS) NODE_NAME_CASE(BSTRPICK) + NODE_NAME_CASE(MOVGR2FR_W_LA64) + NODE_NAME_CASE(MOVFR2GR_S_LA64) + NODE_NAME_CASE(FTINT) } #undef NODE_NAME_CASE return nullptr; @@ -483,6 +915,132 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( return Chain; } +// Lower a call to a callseq_start + CALL + callseq_end chain, and add input +// and output parameter nodes. +SDValue +LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &DL = CLI.DL; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + CLI.IsTailCall = false; + + if (IsVarArg) + report_fatal_error("LowerCall with varargs not implemented"); + + MachineFunction &MF = DAG.getMachineFunction(); + + // Analyze the operands of the call, assigning locations to each operand. + SmallVector<CCValAssign> ArgLocs; + CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + + analyzeOutputArgs(ArgCCInfo, Outs, CC_LoongArch); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = ArgCCInfo.getNextStackOffset(); + + for (auto &Arg : Outs) { + if (!Arg.Flags.isByVal()) + continue; + report_fatal_error("Passing arguments byval not implemented"); + } + + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); + + // Copy argument values to their designated locations. + SmallVector<std::pair<Register, SDValue>> RegsToPass; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue ArgValue = OutVals[i]; + + // Promote the value if needed. + // For now, only handle fully promoted arguments. + if (VA.getLocInfo() != CCValAssign::Full) + report_fatal_error("Unknown loc info"); + + if (VA.isRegLoc()) { + // Queue up the argument copies and emit them at the end. + RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); + } else { + report_fatal_error("Passing arguments via the stack not implemented"); + } + } + + SDValue Glue; + + // Build a sequence of copy-to-reg nodes, chained and glued together. + for (auto &Reg : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); + Glue = Chain.getValue(1); + } + + // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a + // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't + // split it and then direct call can be matched by PseudoCALL. + // FIXME: Add target flags for relocation. + if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT); + else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT); + + // The first call operand is the chain and the second is the target address. + SmallVector<SDValue> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (auto &Reg : RegsToPass) + Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies, if any. + if (Glue.getNode()) + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); + Glue = Chain.getValue(1); + + // Mark the end of the call, which is glued to the call itself. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true), + DAG.getConstant(0, DL, PtrVT, true), Glue, DL); + Glue = Chain.getValue(1); + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign> RVLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + analyzeInputArgs(RetCCInfo, Ins, CC_LoongArch); + + // Copy all of the result registers out of their specified physreg. + for (auto &VA : RVLocs) { + // Copy the value out. + SDValue RetValue = + DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); + Chain = RetValue.getValue(1); + Glue = RetValue.getValue(2); + + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + bool LoongArchTargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { @@ -529,3 +1087,14 @@ SDValue LoongArchTargetLowering::LowerReturn( return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); } + +bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { + assert((VT == MVT::f32 || VT == MVT::f64) && "Unexpected VT"); + + if (VT == MVT::f32 && !Subtarget.hasBasicF()) + return false; + if (VT == MVT::f64 && !Subtarget.hasBasicD()) + return false; + return (Imm.isZero() || Imm.isExactlyValue(+1.0)); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index c852577a3744..279550482675 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -27,6 +27,7 @@ enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, // TODO: add more LoongArchISDs + CALL, RET, // 32-bit shifts, directly matching the semantics of the named LoongArch // instructions. @@ -34,6 +35,13 @@ enum NodeType : unsigned { SRA_W, SRL_W, + // FPR<->GPR transfer operations + MOVGR2FR_W_LA64, + MOVFR2GR_S_LA64, + + FTINT, + + BSTRINS, BSTRPICK, }; @@ -72,6 +80,8 @@ public: const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const override; private: /// Target-specific function used to lower LoongArch calling conventions. @@ -86,8 +96,24 @@ private: const SmallVectorImpl<ISD::OutputArg> &Outs, LoongArchCCAssignFn Fn) const; + SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; + + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; + SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + return isa<LoadInst>(I) || isa<StoreInst>(I); + } }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index 146ef53befd5..bcbd4b28f3c7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -12,6 +12,7 @@ #include "LoongArchInstrInfo.h" #include "LoongArch.h" +#include "LoongArchMachineFunctionInfo.h" using namespace llvm; @@ -19,8 +20,8 @@ using namespace llvm; #include "LoongArchGenInstrInfo.inc" LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI) - // FIXME: add CFSetup and CFDestroy Inst when we implement function call. - : LoongArchGenInstrInfo() {} + : LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN, + LoongArch::ADJCALLSTACKUP) {} void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -47,3 +48,68 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, get(Opc), DstReg) .addReg(SrcReg, getKillRegState(KillSrc)); } + +void LoongArchInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, + bool IsKill, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + MachineFrameInfo &MFI = MF->getFrameInfo(); + + unsigned Opcode; + if (LoongArch::GPRRegClass.hasSubClassEq(RC)) + Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 + ? LoongArch::ST_W + : LoongArch::ST_D; + else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FST_S; + else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FST_D; + else + llvm_unreachable("Can't store this register to stack slot"); + + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); + + BuildMI(MBB, I, DL, get(Opcode)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); +} + +void LoongArchInstrInfo::loadRegFromStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, + int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + MachineFrameInfo &MFI = MF->getFrameInfo(); + + unsigned Opcode; + if (LoongArch::GPRRegClass.hasSubClassEq(RC)) + Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 + ? LoongArch::LD_W + : LoongArch::LD_D; + else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FLD_S; + else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FLD_D; + else + llvm_unreachable("Can't load this register from stack slot"); + + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); + + BuildMI(MBB, I, DL, get(Opcode), DstReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index f31943b85a51..0a8c86a5e0c2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -30,6 +30,16 @@ public: void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool IsKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register DstReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 6b8ee9e43f94..d07d086bd7da 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -14,22 +14,45 @@ // LoongArch specific DAG Nodes. //===----------------------------------------------------------------------===// +// Target-independent type requirements, but with target-specific formats. +def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; +def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; + // Target-dependent type requirements. +def SDT_LoongArchCall : SDTypeProfile<0, -1, [SDTCisVT<0, GRLenVT>]>; def SDT_LoongArchIntBinOpW : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64> ]>; +def SDT_LoongArchBStrIns: SDTypeProfile<1, 4, [ + SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>, + SDTCisSameAs<3, 4> +]>; + def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3> ]>; // TODO: Add LoongArch specific DAG Nodes +// Target-independent nodes, but with target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + // Target-dependent nodes. +def loongarch_call : SDNode<"LoongArchISD::CALL", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; +def loongarch_bstrins + : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>; def loongarch_bstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; @@ -106,7 +129,14 @@ def simm16 : Operand<GRLenVT> { let DecoderMethod = "decodeSImmOperand<16>"; } -def simm16_lsl2 : Operand<GRLenVT> { +def simm16_lsl2 : Operand<GRLenVT>, + ImmLeaf<GRLenVT, [{return isInt<16>(Imm>>2);}]> { + let ParserMatchClass = SImmAsmOperand<16, "lsl2">; + let EncoderMethod = "getImmOpValueAsr2"; + let DecoderMethod = "decodeSImmOperand<16, 2>"; +} + +def simm16_lsl2_br : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<16, "lsl2">; let EncoderMethod = "getImmOpValueAsr2"; let DecoderMethod = "decodeSImmOperand<16, 2>"; @@ -117,13 +147,13 @@ def simm20 : Operand<GRLenVT> { let DecoderMethod = "decodeSImmOperand<20>"; } -def simm21_lsl2 : Operand<GRLenVT> { +def simm21_lsl2 : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<21, "lsl2">; let EncoderMethod = "getImmOpValueAsr2"; let DecoderMethod = "decodeSImmOperand<21, 2>"; } -def simm26_lsl2 : Operand<GRLenVT> { +def simm26_lsl2 : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<26, "lsl2">; let EncoderMethod = "getImmOpValueAsr2"; let DecoderMethod = "decodeSImmOperand<26, 2>"; @@ -141,6 +171,24 @@ def NegImm : SDNodeXForm<imm, [{ N->getValueType(0)); }]>; +// FP immediate patterns. +def fpimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(+0.0);}]>; +def fpimm0neg : PatLeaf<(fpimm), [{return N->isExactlyValue(-0.0);}]>; +def fpimm1 : PatLeaf<(fpimm), [{return N->isExactlyValue(+1.0);}]>; + +def CallSymbol: AsmOperandClass { + let Name = "CallSymbol"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImm"; +} + +// A bare symbol used in call only. +def call_symbol : Operand<iPTR> { + let ParserMatchClass = CallSymbol; +} + +def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">; + //===----------------------------------------------------------------------===// // Instruction Formats //===----------------------------------------------------------------------===// @@ -185,7 +233,7 @@ class RDTIME_2R<bits<22> op, string opstr> : Fmt2R<op, (outs GPR:$rd, GPR:$rj), (ins), opstr, "$rd, $rj">; class BrCC_2RI16<bits<6> op, string opstr> - : Fmt2RI16<op, (outs), (ins GPR:$rj, GPR:$rd, simm16_lsl2:$imm16), opstr, + : Fmt2RI16<op, (outs), (ins GPR:$rj, GPR:$rd, simm16_lsl2_br:$imm16), opstr, "$rj, $rd, $imm16"> { let isBranch = 1; let isTerminator = 1; @@ -274,10 +322,12 @@ def XORI : ALU_2RI12<0b0000001111, "xori", uimm12>; def MUL_W : ALU_3R<0b00000000000111000, "mul.w">; def MULH_W : ALU_3R<0b00000000000111001, "mulh.w">; def MULH_WU : ALU_3R<0b00000000000111010, "mulh.wu">; +let usesCustomInserter = true in { def DIV_W : ALU_3R<0b00000000001000000, "div.w">; def MOD_W : ALU_3R<0b00000000001000001, "mod.w">; def DIV_WU : ALU_3R<0b00000000001000010, "div.wu">; def MOD_WU : ALU_3R<0b00000000001000011, "mod.wu">; +} // usesCustomInserter = true // Bit-shift Instructions def SLL_W : ALU_3R<0b00000000000101110, "sll.w">; @@ -379,10 +429,12 @@ def MULH_D : ALU_3R<0b00000000000111100, "mulh.d">; def MULH_DU : ALU_3R<0b00000000000111101, "mulh.du">; def MULW_D_W : ALU_3R<0b00000000000111110, "mulw.d.w">; def MULW_D_WU : ALU_3R<0b00000000000111111, "mulw.d.wu">; +let usesCustomInserter = true in { def DIV_D : ALU_3R<0b00000000001000100, "div.d">; def MOD_D : ALU_3R<0b00000000001000101, "mod.d">; def DIV_DU : ALU_3R<0b00000000001000110, "div.du">; def MOD_DU : ALU_3R<0b00000000001000111, "mod.du">; +} // usesCustomInserter = true // Bit-shift Instructions for 64-bits def SLL_D : ALU_3R<0b00000000000110001, "sll.d">; @@ -545,6 +597,9 @@ def shiftMaskGRLen : ComplexPattern<GRLenVT, 1, "selectShiftMaskGRLen", [], [], 0>; def shiftMask32 : ComplexPattern<i64, 1, "selectShiftMask32", [], [], 0>; +def sexti32 : ComplexPattern<i64, 1, "selectSExti32">; +def zexti32 : ComplexPattern<i64, 1, "selectZExti32">; + class shiftop<SDPatternOperator operator> : PatFrag<(ops node:$val, node:$count), (operator node:$val, (GRLenVT (shiftMaskGRLen node:$count)))>; @@ -556,6 +611,13 @@ let Predicates = [IsLA32] in { def : PatGprGpr<add, ADD_W>; def : PatGprImm<add, ADDI_W, simm12>; def : PatGprGpr<sub, SUB_W>; +def : PatGprGpr<sdiv, DIV_W>; +def : PatGprGpr<udiv, DIV_WU>; +def : PatGprGpr<srem, MOD_W>; +def : PatGprGpr<urem, MOD_WU>; +def : PatGprGpr<mul, MUL_W>; +def : PatGprGpr<mulhs, MULH_W>; +def : PatGprGpr<mulhu, MULH_WU>; } // Predicates = [IsLA32] let Predicates = [IsLA64] in { @@ -565,6 +627,24 @@ def : PatGprImm<add, ADDI_D, simm12>; def : PatGprImm_32<add, ADDI_W, simm12>; def : PatGprGpr<sub, SUB_D>; def : PatGprGpr_32<sub, SUB_W>; +def : PatGprGpr<sdiv, DIV_D>; +def : PatGprGpr<udiv, DIV_DU>; +def : PatGprGpr<srem, MOD_D>; +def : PatGprGpr<urem, MOD_DU>; +// TODO: Select "_W[U]" instructions for i32xi32 if only lower 32 bits of the +// product are used. +def : PatGprGpr<mul, MUL_D>; +def : PatGprGpr<mulhs, MULH_D>; +def : PatGprGpr<mulhu, MULH_DU>; +// Select MULW_D_W for calculating the full 64 bits product of i32xi32 signed +// multiplication. +def : Pat<(i64 (mul (sext_inreg GPR:$rj, i32), (sext_inreg GPR:$rk, i32))), + (MULW_D_W GPR:$rj, GPR:$rk)>; +// Select MULW_D_WU for calculating the full 64 bits product of i32xi32 +// unsigned multiplication. +def : Pat<(i64 (mul (loongarch_bstrpick GPR:$rj, (i64 31), (i64 0)), + (loongarch_bstrpick GPR:$rk, (i64 31), (i64 0)))), + (MULW_D_WU GPR:$rj, GPR:$rk)>; } // Predicates = [IsLA64] def : PatGprGpr<and, AND>; @@ -649,19 +729,143 @@ def : Pat<(select GPR:$cond, GPR:$t, GPR:$f), /// Branches and jumps +class BccPat<PatFrag CondOp, LAInst Inst> + : Pat<(brcond (GRLenVT (CondOp GPR:$rj, GPR:$rd)), bb:$imm16), + (Inst GPR:$rj, GPR:$rd, bb:$imm16)>; + +def : BccPat<seteq, BEQ>; +def : BccPat<setne, BNE>; +def : BccPat<setlt, BLT>; +def : BccPat<setge, BGE>; +def : BccPat<setult, BLTU>; +def : BccPat<setuge, BGEU>; + +class BccSwapPat<PatFrag CondOp, LAInst InstBcc> + : Pat<(brcond (GRLenVT (CondOp GPR:$rd, GPR:$rj)), bb:$imm16), + (InstBcc GPR:$rj, GPR:$rd, bb:$imm16)>; + +// Condition codes that don't have matching LoongArch branch instructions, but +// are trivially supported by swapping the two input operands. +def : BccSwapPat<setgt, BLT>; +def : BccSwapPat<setle, BGE>; +def : BccSwapPat<setugt, BLTU>; +def : BccSwapPat<setule, BGEU>; + +// An extra pattern is needed for a brcond without a setcc (i.e. where the +// condition was calculated elsewhere). +def : Pat<(brcond GPR:$rj, bb:$imm21), (BNEZ GPR:$rj, bb:$imm21)>; + +let isBarrier = 1, isBranch = 1, isTerminator = 1 in +def PseudoBR : Pseudo<(outs), (ins simm26_lsl2:$imm26), [(br bb:$imm26)]>, + PseudoInstExpansion<(B simm26_lsl2:$imm26)>; + +let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in +def PseudoBRIND : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16), []>, + PseudoInstExpansion<(JIRL R0, GPR:$rj, simm16_lsl2:$imm16)>; + +def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>; +def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), + (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; + +let isCall = 1, Defs = [R1] in +def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> { + let AsmString = "bl\t$func"; +} + +def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; +def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; + +let isCall = 1, Defs = [R1] in +def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), + [(loongarch_call GPR:$rj)]>, + PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; + let isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, PseudoInstExpansion<(JIRL R0, R1, 0)>; -/// BSTRPICK +/// BSTRINS and BSTRPICK -let Predicates = [IsLA32] in +let Predicates = [IsLA32] in { +def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd), + (BSTRINS_W GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; def : Pat<(loongarch_bstrpick GPR:$rj, uimm5:$msbd, uimm5:$lsbd), (BSTRPICK_W GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; +} // Predicates = [IsLA32] -let Predicates = [IsLA64] in +let Predicates = [IsLA64] in { +def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd), + (BSTRINS_D GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd), (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; +} // Predicates = [IsLA64] + +/// Loads + +multiclass LdPat<PatFrag LoadOp, LAInst Inst, ValueType vt = GRLenVT> { + def : Pat<(vt (LoadOp BaseAddr:$rj)), (Inst BaseAddr:$rj, 0)>; + def : Pat<(vt (LoadOp (add BaseAddr:$rj, simm12:$imm12))), + (Inst BaseAddr:$rj, simm12:$imm12)>; +} + +defm : LdPat<sextloadi8, LD_B>; +defm : LdPat<extloadi8, LD_B>; +defm : LdPat<sextloadi16, LD_H>; +defm : LdPat<extloadi16, LD_H>; +defm : LdPat<load, LD_W>, Requires<[IsLA32]>; +defm : LdPat<zextloadi8, LD_BU>; +defm : LdPat<zextloadi16, LD_HU>; +let Predicates = [IsLA64] in { +defm : LdPat<sextloadi32, LD_W, i64>; +defm : LdPat<extloadi32, LD_W, i64>; +defm : LdPat<zextloadi32, LD_WU, i64>; +defm : LdPat<load, LD_D, i64>; +} // Predicates = [IsLA64] + +/// Stores + +multiclass StPat<PatFrag StoreOp, LAInst Inst, RegisterClass StTy, + ValueType vt> { + def : Pat<(StoreOp (vt StTy:$rd), BaseAddr:$rj), + (Inst StTy:$rd, BaseAddr:$rj, 0)>; + def : Pat<(StoreOp (vt StTy:$rd), (add BaseAddr:$rj, simm12:$imm12)), + (Inst StTy:$rd, BaseAddr:$rj, simm12:$imm12)>; +} + +defm : StPat<truncstorei8, ST_B, GPR, GRLenVT>; +defm : StPat<truncstorei16, ST_H, GPR, GRLenVT>; +defm : StPat<store, ST_W, GPR, i32>, Requires<[IsLA32]>; +let Predicates = [IsLA64] in { +defm : StPat<truncstorei32, ST_W, GPR, i64>; +defm : StPat<store, ST_D, GPR, i64>; +} // Predicates = [IsLA64] + +/// Atomic loads and stores + +def : Pat<(atomic_fence timm, timm), (DBAR 0)>; + +defm : LdPat<atomic_load_8, LD_B>; +defm : LdPat<atomic_load_16, LD_H>; +defm : LdPat<atomic_load_32, LD_W>; + +defm : StPat<atomic_store_8, ST_B, GPR, GRLenVT>; +defm : StPat<atomic_store_16, ST_H, GPR, GRLenVT>; +defm : StPat<atomic_store_32, ST_W, GPR, i32>, Requires<[IsLA32]>; +let Predicates = [IsLA64] in { +defm : LdPat<atomic_load_64, LD_D>; +defm : StPat<atomic_store_32, ST_W, GPR, i64>; +defm : StPat<atomic_store_64, ST_D, GPR, i64>; +} // Predicates = [IsLA64] + +/// Other pseudo-instructions + +// Pessimistically assume the stack pointer will be clobbered +let Defs = [R3], Uses = [R3] in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_start timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_end timm:$amt1, timm:$amt2)]>; +} // Defs = [R3], Uses = [R3] //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp index 7416c93b4d05..488c66f47863 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -22,6 +22,22 @@ using namespace llvm; +static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, + const AsmPrinter &AP) { + MCContext &Ctx = AP.OutContext; + + // TODO: Processing target flags. + + const MCExpr *ME = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); + + if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) + ME = MCBinaryExpr::createAdd( + ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + + return MCOperand::createExpr(ME); +} + bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &MCOp, const AsmPrinter &AP) { @@ -41,12 +57,21 @@ bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, case MachineOperand::MO_Immediate: MCOp = MCOperand::createImm(MO.getImm()); break; - // TODO: lower special operands - case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_ConstantPoolIndex: + MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP); + break; case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_BlockAddress: + MCOp = lowerSymbolOperand(MO, AP.getSymbolPreferLocal(*MO.getGlobal()), AP); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), AP); + break; case MachineOperand::MO_ExternalSymbol: - case MachineOperand::MO_ConstantPoolIndex: + MCOp = lowerSymbolOperand( + MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); + break; + // TODO: lower special operands + case MachineOperand::MO_BlockAddress: case MachineOperand::MO_JumpTableIndex: break; } diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp index b9bae8e56304..05902ebb7ba6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -110,6 +110,28 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { + // TODO: this implementation is a temporary placeholder which does just + // enough to allow other aspects of code generation to be tested. + assert(SPAdj == 0 && "Unexpected non-zero SPAdj value"); - // TODO: Implement this when we have function calls + + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + DebugLoc DL = MI.getDebugLoc(); + + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + Register FrameReg; + StackOffset Offset = + TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) + + StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); + + // Offsets must be encodable with a 12-bit immediate field. + if (!isInt<12>(Offset.getFixed())) { + report_fatal_error("Frame offsets outside of the signed 12-bit range is " + "not supported currently"); + } + + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 3a1a46a9e624..468c4f43cb90 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -102,6 +102,7 @@ public: return getTM<LoongArchTargetMachine>(); } + void addIRPasses() override; bool addInstSelector() override; }; } // namespace @@ -111,6 +112,12 @@ LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { return new LoongArchPassConfig(*this, PM); } +void LoongArchPassConfig::addIRPasses() { + addPass(createAtomicExpandPass()); + + TargetPassConfig::addIRPasses(); +} + bool LoongArchPassConfig::addInstSelector() { addPass(createLoongArchISelDag(getLoongArchTargetMachine())); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index c733c194e6a2..e50761ab1e27 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Compiler.h" #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "LoongArchGenInstrInfo.inc" #define GET_REGINFO_MC_DESC diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h index e576b9a49cd6..a606ccdbc47c 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h @@ -46,6 +46,7 @@ createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); // Defines symbolic names for LoongArch instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "LoongArchGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp index 3bcce9e3ba3b..4933d40f3388 100644 --- a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp +++ b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp @@ -77,6 +77,9 @@ bool M68kAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } void M68kAsmPrinter::emitInstruction(const MachineInstr *MI) { + M68k_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + switch (MI->getOpcode()) { default: { if (MI->isPseudo()) { diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp index 2606e22410fc..e6290d4cbec5 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp @@ -31,6 +31,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "M68kGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h index 0dc601ad876b..2a1cc678016a 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h @@ -52,6 +52,7 @@ std::unique_ptr<MCObjectTargetWriter> createM68kELFObjectWriter(uint8_t OSABI); // Defines symbolic names for the M68k instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "M68kGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index 3f006056955d..13a880de68b5 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -22,6 +22,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "MSP430GenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h index 24b0b3298592..e596c3f1ce46 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h @@ -53,6 +53,7 @@ createMSP430ELFObjectWriter(uint8_t OSABI); // Defines symbolic names for the MSP430 instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "MSP430GenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp index 85c59d5b14b5..9cd2cbe89e46 100644 --- a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -149,6 +149,9 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, //===----------------------------------------------------------------------===// void MSP430AsmPrinter::emitInstruction(const MachineInstr *MI) { + MSP430_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + MSP430MCInstLower MCInstLowering(OutContext, *this); MCInst TmpInst; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 6fc8fcb482cd..40c807082fdc 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -36,6 +36,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "MipsGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 8531177ee924..d51f3b9abcfd 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -55,6 +55,7 @@ StringRef selectMipsCPU(const Triple &TT, StringRef CPU); // Defines symbolic names for the Mips instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "MipsGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 9330a791a7cc..fcaf450cc511 100644 --- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -181,6 +181,10 @@ static void emitDirectiveRelocJalr(const MachineInstr &MI, } void MipsAsmPrinter::emitInstruction(const MachineInstr *MI) { + // FIXME: Enable feature predicate checks once all the test pass. + // Mips_MC::verifyInstructionPredicates(MI->getOpcode(), + // getSubtargetInfo().getFeatureBits()); + MipsTargetStreamer &TS = getTargetStreamer(); unsigned Opc = MI->getOpcode(); TS.forbidModuleDirective(); diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 856d03f0b210..0ba29fb48b05 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -23,6 +23,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "NVPTXGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h index b394566edd0d..78f4e6745502 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h @@ -21,6 +21,7 @@ // Defines symbolic names for the PTX instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "NVPTXGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index 41e9f375e536..8c92766faecb 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -183,6 +183,7 @@ enum CmpMode { // Defines symbolic names for the NVPTX instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "NVPTXGenInstrInfo.inc" #endif diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index b1d842122060..9977d8ba0300 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -139,6 +139,9 @@ VisitGlobalVariableForEmission(const GlobalVariable *GV, } void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) { + NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + MCInst Inst; lowerToMCInst(MI, Inst); EmitToStreamer(*OutStreamer, Inst); diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 2201eb19c80f..b4f7a64f144a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -270,10 +270,6 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, // ShuffleVector return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1], NewOperands[2]); - case Instruction::InsertValue: - // InsertValueConstantExpr - return Builder.CreateInsertValue(NewOperands[0], NewOperands[1], - C->getIndices()); case Instruction::GetElementPtr: // GetElementPtrConstantExpr return Builder.CreateGEP(cast<GEPOperator>(C)->getSourceElementType(), diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 746f652bfa36..6ad016dfa0a7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1861,7 +1861,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Ret.getValue(2); if (ProxyRegTruncates[i]) { - Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret); + Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].value(), Ret); } InVals.push_back(Ret); diff --git a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp index eeedce2d99cb..202134ed7035 100644 --- a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp @@ -35,6 +35,8 @@ public: bool runOnFunction(Function &F) override; + StringRef getPassName() const override { return "NVPTX Image Optimizer"; } + private: bool replaceIsTypePSampler(Instruction &I); bool replaceIsTypePSurface(Instruction &I); diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index 16fbe1a65562..7929bd2e0df0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -36,6 +36,8 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return "NVPTX Prolog Epilog Pass"; } + private: void calculateFrameObjectOffsets(MachineFunction &Fn); }; diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 2d6d72777db2..4e41515b997d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -18,7 +18,6 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include <algorithm> #include <cstring> @@ -32,19 +31,27 @@ namespace llvm { namespace { typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t; typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t; -typedef std::map<const Module *, global_val_annot_t> per_module_annot_t; -} // anonymous namespace -static ManagedStatic<per_module_annot_t> annotationCache; -static sys::Mutex Lock; +struct AnnotationCache { + sys::Mutex Lock; + std::map<const Module *, global_val_annot_t> Cache; +}; + +AnnotationCache &getAnnotationCache() { + static AnnotationCache AC; + return AC; +} +} // anonymous namespace void clearAnnotationCache(const Module *Mod) { - std::lock_guard<sys::Mutex> Guard(Lock); - annotationCache->erase(Mod); + auto &AC = getAnnotationCache(); + std::lock_guard<sys::Mutex> Guard(AC.Lock); + AC.Cache.erase(Mod); } static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { - std::lock_guard<sys::Mutex> Guard(Lock); + auto &AC = getAnnotationCache(); + std::lock_guard<sys::Mutex> Guard(AC.Lock); assert(md && "Invalid mdnode for annotation"); assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands"); // start index = 1, to skip the global variable key @@ -70,7 +77,8 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { } static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { - std::lock_guard<sys::Mutex> Guard(Lock); + auto &AC = getAnnotationCache(); + std::lock_guard<sys::Mutex> Guard(AC.Lock); NamedMDNode *NMD = m->getNamedMetadata("nvvm.annotations"); if (!NMD) return; @@ -93,40 +101,42 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { if (tmp.empty()) // no annotations for this gv return; - if ((*annotationCache).find(m) != (*annotationCache).end()) - (*annotationCache)[m][gv] = std::move(tmp); + if (AC.Cache.find(m) != AC.Cache.end()) + AC.Cache[m][gv] = std::move(tmp); else { global_val_annot_t tmp1; tmp1[gv] = std::move(tmp); - (*annotationCache)[m] = std::move(tmp1); + AC.Cache[m] = std::move(tmp1); } } bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, unsigned &retval) { - std::lock_guard<sys::Mutex> Guard(Lock); + auto &AC = getAnnotationCache(); + std::lock_guard<sys::Mutex> Guard(AC.Lock); const Module *m = gv->getParent(); - if ((*annotationCache).find(m) == (*annotationCache).end()) + if (AC.Cache.find(m) == AC.Cache.end()) cacheAnnotationFromMD(m, gv); - else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end()) + else if (AC.Cache[m].find(gv) == AC.Cache[m].end()) cacheAnnotationFromMD(m, gv); - if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end()) + if (AC.Cache[m][gv].find(prop) == AC.Cache[m][gv].end()) return false; - retval = (*annotationCache)[m][gv][prop][0]; + retval = AC.Cache[m][gv][prop][0]; return true; } bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, std::vector<unsigned> &retval) { - std::lock_guard<sys::Mutex> Guard(Lock); + auto &AC = getAnnotationCache(); + std::lock_guard<sys::Mutex> Guard(AC.Lock); const Module *m = gv->getParent(); - if ((*annotationCache).find(m) == (*annotationCache).end()) + if (AC.Cache.find(m) == AC.Cache.end()) cacheAnnotationFromMD(m, gv); - else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end()) + else if (AC.Cache[m].find(gv) == AC.Cache[m].end()) cacheAnnotationFromMD(m, gv); - if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end()) + if (AC.Cache[m][gv].find(prop) == AC.Cache[m][gv].end()) return false; - retval = (*annotationCache)[m][gv][prop]; + retval = AC.Cache[m][gv][prop]; return true; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 46bbc44e1681..fa9e69f2e607 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -449,12 +449,9 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, return MO.getImm(); } -void PPCMCCodeEmitter::encodeInstruction( - const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - verifyInstructionPredicates(MI, - computeAvailableFeatures(STI.getFeatureBits())); - +void PPCMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); // Output the constant in big/little endian byte order. @@ -492,5 +489,4 @@ bool PPCMCCodeEmitter::isPrefixedInstruction(const MCInst &MI) const { return InstrInfo->isPrefixed(Opcode); } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "PPCGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h index 39b2f1211f29..c4d4d35a6665 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h @@ -121,12 +121,6 @@ public: // Is this instruction a prefixed instruction. bool isPrefixedInstruction(const MCInst &MI) const; - -private: - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; }; } // namespace llvm diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index a651362f703b..1008dc63d064 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -48,6 +48,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "PPCGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index acb860e16518..3ca6f394f60b 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -118,6 +118,7 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) { // #define GET_INSTRINFO_ENUM #define GET_INSTRINFO_SCHED_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "PPCGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 22f35c8fa8d3..58a75baf8081 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -230,6 +230,9 @@ private: void emitGlobalVariableHelper(const GlobalVariable *); + // Get the offset of an alias based on its AliaseeObject. + uint64_t getAliasOffset(const Constant *C); + public: PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) : PPCAsmPrinter(TM, std::move(Streamer)) { @@ -656,6 +659,9 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO, /// the current output stream. /// void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { + PPC_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + MCInst TmpInst; const bool IsPPC64 = Subtarget->isPPC64(); const bool IsAIX = Subtarget->isAIXABI(); @@ -2352,6 +2358,24 @@ static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) { .Default(false); } +uint64_t PPCAIXAsmPrinter::getAliasOffset(const Constant *C) { + if (auto *GA = dyn_cast<GlobalAlias>(C)) + return getAliasOffset(GA->getAliasee()); + if (auto *CE = dyn_cast<ConstantExpr>(C)) { + const MCExpr *LowC = lowerConstant(CE); + const MCBinaryExpr *CBE = dyn_cast<MCBinaryExpr>(LowC); + if (!CBE) + return 0; + if (CBE->getOpcode() != MCBinaryExpr::Add) + report_fatal_error("Only adding an offset is supported now."); + auto *RHS = dyn_cast<MCConstantExpr>(CBE->getRHS()); + if (!RHS) + report_fatal_error("Unable to get the offset of alias."); + return RHS->getValue(); + } + return 0; +} + void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { // Special LLVM global arrays have been handled at the initialization. if (isSpecialLLVMGlobalArrayToSkip(GV) || isSpecialLLVMGlobalArrayForStaticInit(GV)) @@ -2422,20 +2446,34 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) { } MCSymbol *EmittedInitSym = GVSym; + + // Emit linkage for the global variable and its aliases. emitLinkage(GV, EmittedInitSym); + for (const GlobalAlias *GA : GOAliasMap[GV]) + emitLinkage(GA, getSymbol(GA)); + emitAlignment(getGVAlignment(GV, DL), GV); // When -fdata-sections is enabled, every GlobalVariable will // be put into its own csect; therefore, label is not necessary here. - if (!TM.getDataSections() || GV->hasSection()) { + if (!TM.getDataSections() || GV->hasSection()) OutStreamer->emitLabel(EmittedInitSym); + + // No alias to emit. + if (!GOAliasMap[GV].size()) { + emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); + return; } - // Emit aliasing label for global variable. - for (const GlobalAlias *Alias : GOAliasMap[GV]) - OutStreamer->emitLabel(getSymbol(Alias)); + // Aliases with the same offset should be aligned. Record the list of aliases + // associated with the offset. + AliasMapTy AliasList; + for (const GlobalAlias *GA : GOAliasMap[GV]) + AliasList[getAliasOffset(GA->getAliasee())].push_back(GA); - emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); + // Emit alias label and element value for global variable. + emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer(), + &AliasList); } void PPCAIXAsmPrinter::emitFunctionDescriptor() { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 5b9d1e66b04e..3c461a627d61 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -392,8 +392,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // MASS transformation for LLVM intrinsics with replicating fast-math flag // to be consistent to PPCGenScalarMASSEntries pass - if (TM.getOptLevel() == CodeGenOpt::Aggressive && - TM.Options.PPCGenScalarMASSEntries) { + if (TM.getOptLevel() == CodeGenOpt::Aggressive) { setOperationAction(ISD::FSIN , MVT::f64, Custom); setOperationAction(ISD::FCOS , MVT::f64, Custom); setOperationAction(ISD::FPOW , MVT::f64, Custom); @@ -17886,13 +17885,17 @@ bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const { return Op.getNode()->getFlags().hasApproximateFuncs(); } +bool PPCTargetLowering::isScalarMASSConversionEnabled() const { + return getTargetMachine().Options.PPCGenScalarMASSEntries; +} + SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName, const char *LibCallFloatName, const char *LibCallDoubleNameFinite, const char *LibCallFloatNameFinite, SDValue Op, SelectionDAG &DAG) const { - if (!isLowringToMASSSafe(Op)) + if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op)) return SDValue(); if (!isLowringToMASSFiniteSafe(Op)) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index f92a117fe27f..4a08cc42fa9d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1293,6 +1293,7 @@ namespace llvm { SelectionDAG &DAG) const; bool isLowringToMASSFiniteSafe(SDValue Op) const; bool isLowringToMASSSafe(SDValue Op) const; + bool isScalarMASSConversionEnabled() const; SDValue lowerLibCallBase(const char *LibCallDoubleName, const char *LibCallFloatName, const char *LibCallDoubleNameFinite, diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index fbd487fbcfd5..59e8f3ff84a4 100644 --- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -43,7 +43,6 @@ namespace { } const PPCInstrInfo *TII; - LiveIntervals *LIS; protected: bool processBlock(MachineBasicBlock &MBB) { @@ -83,11 +82,8 @@ protected: Register InReg = PPC::NoRegister; Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3; Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4; - SmallVector<Register, 3> OrigRegs = {OutReg, GPR3}; - if (!IsPCREL) { + if (!IsPCREL) InReg = MI.getOperand(1).getReg(); - OrigRegs.push_back(InReg); - } DebugLoc DL = MI.getDebugLoc(); unsigned Opc1, Opc2; @@ -139,11 +135,6 @@ protected: BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) .addImm(0); - // The ADDItls* instruction is the first instruction in the - // repair range. - MachineBasicBlock::iterator First = I; - --First; - if (IsAIX) { // The variable offset and region handle are copied in r4 and r3. The // copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX. @@ -177,16 +168,10 @@ protected: BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg) .addReg(GPR3); - // The COPY is the last instruction in the repair range. - MachineBasicBlock::iterator Last = I; - --Last; - // Move past the original instruction and remove it. ++I; MI.removeFromParent(); - // Repair the live intervals. - LIS->repairIntervalsInRange(&MBB, First, Last, OrigRegs); Changed = true; } @@ -204,7 +189,6 @@ public: bool runOnMachineFunction(MachineFunction &MF) override { TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); - LIS = &getAnalysis<LiveIntervals>(); bool Changed = false; @@ -217,9 +201,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); MachineFunctionPass::getAnalysisUsage(AU); } }; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 7c062387fecd..a335b2d23394 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -84,12 +84,6 @@ public: unsigned getVMaskReg(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - -private: - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; }; } // end anonymous namespace @@ -188,9 +182,6 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS, void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - verifyInstructionPredicates(MI, - computeAvailableFeatures(STI.getFeatureBits())); - const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); // Get byte count of instruction. unsigned Size = Desc.getSize(); @@ -403,5 +394,4 @@ unsigned RISCVMCCodeEmitter::getVMaskReg(const MCInst &MI, unsigned OpNo, } } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "RISCVGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index 917d93479f18..c63e0c8e737d 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/ErrorHandling.h" #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "RISCVGenInstrInfo.inc" #define GET_REGINFO_MC_DESC diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h index 276fc9efb6c0..d157257d976c 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h @@ -45,6 +45,7 @@ std::unique_ptr<MCObjectTargetWriter> createRISCVELFObjectWriter(uint8_t OSABI, // Defines symbolic names for RISC-V instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "RISCVGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 5b2a247ebda0..edd39f6547ed 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -91,6 +91,9 @@ void RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { #include "RISCVGenMCPseudoLowering.inc" void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) { + RISCV_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 57d8ba6f0161..a7286b2963c2 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -899,7 +899,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, } std::pair<int64_t, Align> -RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const { +RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); // Create a buffer of RVV objects to allocate. SmallVector<int, 8> ObjectsToAllocate; for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { @@ -912,10 +913,18 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const { ObjectsToAllocate.push_back(I); } - // Allocate all RVV locals and spills - int64_t Offset = 0; // The minimum alignment is 16 bytes. Align RVVStackAlign(16); + const auto &ST = MF.getSubtarget<RISCVSubtarget>(); + + if (!ST.hasVInstructions()) { + assert(ObjectsToAllocate.empty() && + "Can't allocate scalable-vector objects without V instructions"); + return std::make_pair(0, RVVStackAlign); + } + + // Allocate all RVV locals and spills + int64_t Offset = 0; for (int FI : ObjectsToAllocate) { // ObjectSize in bytes. int64_t ObjectSize = MFI.getObjectSize(FI); @@ -997,7 +1006,7 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( int64_t RVVStackSize; Align RVVStackAlign; - std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MFI); + std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MF); RVFI->setRVVStackSize(RVVStackSize); RVFI->setRVVStackAlign(RVVStackAlign); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index 466cd059b749..a5cf68a6ea94 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -84,7 +84,7 @@ private: MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount, MachineInstr::MIFlag Flag) const; std::pair<int64_t, Align> - assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const; + assignRVVStackObjectOffsets(MachineFunction &MF) const; }; -} +} // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index cfaafc7b53d2..5b823af1e9b8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -43,92 +43,95 @@ namespace RISCV { } // namespace llvm void RISCVDAGToDAGISel::PreprocessISelDAG() { - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); - I != E;) { - SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. - - // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point - // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. - if (N->getOpcode() == ISD::SPLAT_VECTOR) { + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + + bool MadeChange = false; + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + if (N->use_empty()) + continue; + + SDValue Result; + switch (N->getOpcode()) { + case ISD::SPLAT_VECTOR: { + // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point + // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. MVT VT = N->getSimpleValueType(0); unsigned Opc = VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; SDLoc DL(N); SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); - SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), - N->getOperand(0), VL); + Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), + N->getOperand(0), VL); + break; + } + case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { + // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector + // load. Done after lowering and combining so that we have a chance to + // optimize this to VMV_V_X_VL when the upper bits aren't needed. + assert(N->getNumOperands() == 4 && "Unexpected number of operands"); + MVT VT = N->getSimpleValueType(0); + SDValue Passthru = N->getOperand(0); + SDValue Lo = N->getOperand(1); + SDValue Hi = N->getOperand(2); + SDValue VL = N->getOperand(3); + assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && + Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && + "Unexpected VTs!"); + MachineFunction &MF = CurDAG->getMachineFunction(); + RISCVMachineFunctionInfo *FuncInfo = + MF.getInfo<RISCVMachineFunctionInfo>(); + SDLoc DL(N); - --I; - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); - ++I; - CurDAG->DeleteNode(N); - continue; + // We use the same frame index we use for moving two i32s into 64-bit FPR. + // This is an analogous operation. + int FI = FuncInfo->getMoveF64FrameIndex(MF); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); + const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); + SDValue StackSlot = + CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); + + SDValue Chain = CurDAG->getEntryNode(); + Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); + + SDValue OffsetSlot = + CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); + Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), + Align(8)); + + Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + + SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); + SDValue IntID = + CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); + SDValue Ops[] = {Chain, + IntID, + Passthru, + StackSlot, + CurDAG->getRegister(RISCV::X0, MVT::i64), + VL}; + + Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, + MVT::i64, MPI, Align(8), + MachineMemOperand::MOLoad); + break; + } } - // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector - // load. Done after lowering and combining so that we have a chance to - // optimize this to VMV_V_X_VL when the upper bits aren't needed. - if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) - continue; + if (Result) { + LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: "); + LLVM_DEBUG(N->dump(CurDAG)); + LLVM_DEBUG(dbgs() << "\nNew: "); + LLVM_DEBUG(Result->dump(CurDAG)); + LLVM_DEBUG(dbgs() << "\n"); - assert(N->getNumOperands() == 4 && "Unexpected number of operands"); - MVT VT = N->getSimpleValueType(0); - SDValue Passthru = N->getOperand(0); - SDValue Lo = N->getOperand(1); - SDValue Hi = N->getOperand(2); - SDValue VL = N->getOperand(3); - assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && - Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && - "Unexpected VTs!"); - MachineFunction &MF = CurDAG->getMachineFunction(); - RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); - SDLoc DL(N); - - // We use the same frame index we use for moving two i32s into 64-bit FPR. - // This is an analogous operation. - int FI = FuncInfo->getMoveF64FrameIndex(MF); - MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); - const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); - SDValue StackSlot = - CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); - - SDValue Chain = CurDAG->getEntryNode(); - Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); - - SDValue OffsetSlot = - CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); - Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), - Align(8)); - - Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - - SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); - SDValue IntID = - CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); - SDValue Ops[] = {Chain, - IntID, - Passthru, - StackSlot, - CurDAG->getRegister(RISCV::X0, MVT::i64), - VL}; - - SDValue Result = CurDAG->getMemIntrinsicNode( - ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), - MachineMemOperand::MOLoad); - - // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the - // vlse we created. This will cause general havok on the dag because - // anything below the conversion could be folded into other existing nodes. - // To avoid invalidating 'I', back it up to the convert node. - --I; - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); - - // Now that we did that, the node is dead. Increment the iterator to the - // next node to process, then delete N. - ++I; - CurDAG->DeleteNode(N); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + MadeChange = true; + } } + + if (MadeChange) + CurDAG->RemoveDeadNodes(); } void RISCVDAGToDAGISel::PostprocessISelDAG() { @@ -143,7 +146,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { continue; MadeChange |= doPeepholeSExtW(N); - MadeChange |= doPeepholeLoadStoreADDI(N); MadeChange |= doPeepholeMaskedRVV(N); } @@ -153,40 +155,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { CurDAG->RemoveDeadNodes(); } -// Returns true if N is a MachineSDNode that has a reg and simm12 memory -// operand. The indices of the base pointer and offset are returned in BaseOpIdx -// and OffsetOpIdx. -static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx, - unsigned &OffsetOpIdx) { - switch (N->getMachineOpcode()) { - case RISCV::LB: - case RISCV::LH: - case RISCV::LW: - case RISCV::LBU: - case RISCV::LHU: - case RISCV::LWU: - case RISCV::LD: - case RISCV::FLH: - case RISCV::FLW: - case RISCV::FLD: - BaseOpIdx = 0; - OffsetOpIdx = 1; - return true; - case RISCV::SB: - case RISCV::SH: - case RISCV::SW: - case RISCV::SD: - case RISCV::FSH: - case RISCV::FSW: - case RISCV::FSD: - BaseOpIdx = 1; - OffsetOpIdx = 2; - return true; - } - - return false; -} - static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq) { SDNode *Result = nullptr; @@ -285,9 +253,7 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands( SDValue Chain = Node->getOperand(0); SDValue Glue; - SDValue Base; - SelectBaseAddr(Node->getOperand(CurOp++), Base); - Operands.push_back(Base); // Base pointer. + Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. if (IsStridedOrIndexed) { Operands.push_back(Node->getOperand(CurOp++)); // Index. @@ -651,83 +617,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); return; } - case ISD::ADD: { - // Try to select ADD + immediate used as memory addresses to - // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by - // doPeepholeLoadStoreADDI. - - // LHS should be an immediate. - auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); - if (!N1C) - break; - - int64_t Offset = N1C->getSExtValue(); - int64_t Lo12 = SignExtend64<12>(Offset); - - // Don't do this if the lower 12 bits are 0 or we could use ADDI directly. - if (Lo12 == 0 || isInt<12>(Offset)) - break; - - // Don't do this if we can use a pair of ADDIs. - if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2)) - break; - - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits()); - - Offset -= Lo12; - // Restore sign bits for RV32. - if (!Subtarget->is64Bit()) - Offset = SignExtend64<32>(Offset); - - // We can fold if the last operation is an ADDI or its an ADDIW that could - // be treated as an ADDI. - if (Seq.back().Opc != RISCV::ADDI && - !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset))) - break; - assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12"); - // Drop the last operation. - Seq.pop_back(); - assert(!Seq.empty() && "Expected more instructions in sequence"); - - bool AllPointerUses = true; - for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - - // Is this user a memory instruction that uses a register and immediate - // that has this ADD as its pointer. - unsigned BaseOpIdx, OffsetOpIdx; - if (!User->isMachineOpcode() || - !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) || - UI.getOperandNo() != BaseOpIdx) { - AllPointerUses = false; - break; - } - - // If the memory instruction already has an offset, make sure the combined - // offset is foldable. - int64_t MemOffs = - cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue(); - MemOffs += Lo12; - if (!isInt<12>(MemOffs)) { - AllPointerUses = false; - break; - } - } - - if (!AllPointerUses) - break; - - // Emit (ADDI (ADD X, Hi), Lo) - SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq); - SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT, - Node->getOperand(0), SDValue(Imm, 0)); - SDNode *ADDI = - CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0), - CurDAG->getTargetConstant(Lo12, DL, VT)); - ReplaceNode(Node, ADDI); - return; - } case ISD::SHL: { auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); if (!N1C) @@ -856,10 +745,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (!C) break; - uint64_t C2 = C->getZExtValue(); + unsigned C2 = C->getZExtValue(); unsigned XLen = Subtarget->getXLen(); - if (!C2 || C2 >= XLen) - break; + assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); uint64_t C1 = N1C->getZExtValue(); @@ -885,10 +773,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask // with c3 leading zeros. if (!LeftShift && isMask_64(C1)) { - uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); - if (C2 < C3) { + unsigned Leading = XLen - (64 - countLeadingZeros(C1)); + if (C2 < Leading) { // If the number of leading zeros is C2+32 this can be SRLIW. - if (C2 + 32 == C3) { + if (C2 + 32 == Leading) { SDNode *SRLIW = CurDAG->getMachineNode( RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); ReplaceNode(Node, SRLIW); @@ -900,7 +788,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type // legalized and goes through DAG combine. - if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && + if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && X.getOpcode() == ISD::SIGN_EXTEND_INREG && cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { SDNode *SRAIW = @@ -908,25 +796,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { CurDAG->getTargetConstant(31, DL, VT)); SDNode *SRLIW = CurDAG->getMachineNode( RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), - CurDAG->getTargetConstant(C3 - 32, DL, VT)); + CurDAG->getTargetConstant(Leading - 32, DL, VT)); ReplaceNode(Node, SRLIW); return; } // (srli (slli x, c3-c2), c3). // Skip if we could use (zext.w (sraiw X, C2)). - bool Skip = Subtarget->hasStdExtZba() && C3 == 32 && + bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && X.getOpcode() == ISD::SIGN_EXTEND_INREG && cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; // Also Skip if we can use bexti. - Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1; + Skip |= Subtarget->hasStdExtZbs() && Leading == XLen - 1; if (OneUseOrZExtW && !Skip) { SDNode *SLLI = CurDAG->getMachineNode( RISCV::SLLI, DL, VT, X, - CurDAG->getTargetConstant(C3 - C2, DL, VT)); - SDNode *SRLI = - CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(Leading - C2, DL, VT)); + SDNode *SRLI = CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(Leading, DL, VT)); ReplaceNode(Node, SRLI); return; } @@ -936,12 +824,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask // shifted by c2 bits with c3 leading zeros. if (LeftShift && isShiftedMask_64(C1)) { - uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); + unsigned Leading = XLen - (64 - countLeadingZeros(C1)); - if (C2 + C3 < XLen && - C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { + if (C2 + Leading < XLen && + C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { // Use slli.uw when possible. - if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { + if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { SDNode *SLLI_UW = CurDAG->getMachineNode( RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); ReplaceNode(Node, SLLI_UW); @@ -952,10 +840,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (OneUseOrZExtW && !IsCANDI) { SDNode *SLLI = CurDAG->getMachineNode( RISCV::SLLI, DL, VT, X, - CurDAG->getTargetConstant(C2 + C3, DL, VT)); - SDNode *SRLI = - CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(C2 + Leading, DL, VT)); + SDNode *SRLI = CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(Leading, DL, VT)); ReplaceNode(Node, SRLI); return; } @@ -965,9 +853,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a // shifted mask with c2 leading zeros and c3 trailing zeros. if (!LeftShift && isShiftedMask_64(C1)) { - uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); - uint64_t C3 = countTrailingZeros(C1); - if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) { + unsigned Leading = XLen - (64 - countLeadingZeros(C1)); + unsigned Trailing = countTrailingZeros(C1); + if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && !IsCANDI) { unsigned SrliOpc = RISCV::SRLI; // If the input is zexti32 we should use SRLIW. if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && @@ -976,22 +864,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { X = X.getOperand(0); } SDNode *SRLI = CurDAG->getMachineNode( - SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT)); + SrliOpc, DL, VT, X, + CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(Trailing, DL, VT)); ReplaceNode(Node, SLLI); return; } // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. - if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && + if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && OneUseOrZExtW && !IsCANDI) { - SDNode *SRLIW = - CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X, - CurDAG->getTargetConstant(C2 + C3, DL, VT)); + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, X, + CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(Trailing, DL, VT)); ReplaceNode(Node, SLLI); return; } @@ -1000,25 +889,26 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a // shifted mask with no leading zeros and c3 trailing zeros. if (LeftShift && isShiftedMask_64(C1)) { - uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); - uint64_t C3 = countTrailingZeros(C1); - if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) { + unsigned Leading = XLen - (64 - countLeadingZeros(C1)); + unsigned Trailing = countTrailingZeros(C1); + if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { SDNode *SRLI = CurDAG->getMachineNode( - RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT)); + RISCV::SRLI, DL, VT, X, + CurDAG->getTargetConstant(Trailing - C2, DL, VT)); SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(Trailing, DL, VT)); ReplaceNode(Node, SLLI); return; } // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. - if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { - SDNode *SRLIW = - CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X, - CurDAG->getTargetConstant(C3 - C2, DL, VT)); + if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, X, + CurDAG->getTargetConstant(Trailing - C2, DL, VT)); SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(Trailing, DL, VT)); ReplaceNode(Node, SLLI); return; } @@ -1885,13 +1775,74 @@ bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, return false; } -bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { - // If this is FrameIndex, select it directly. Otherwise just let it get - // selected to a register independently. - if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); - else - Base = Addr; +// Fold constant addresses. +static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, + const MVT VT, const RISCVSubtarget *Subtarget, + SDValue Addr, SDValue &Base, SDValue &Offset) { + if (!isa<ConstantSDNode>(Addr)) + return false; + + int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue(); + + // If the constant is a simm12, we can fold the whole constant and use X0 as + // the base. If the constant can be materialized with LUI+simm12, use LUI as + // the base. We can't use generateInstSeq because it favors LUI+ADDIW. + int64_t Lo12 = SignExtend64<12>(CVal); + int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; + if (!Subtarget->is64Bit() || isInt<32>(Hi)) { + if (Hi) { + int64_t Hi20 = (Hi >> 12) & 0xfffff; + Base = SDValue( + CurDAG->getMachineNode(RISCV::LUI, DL, VT, + CurDAG->getTargetConstant(Hi20, DL, VT)), + 0); + } else { + Base = CurDAG->getRegister(RISCV::X0, VT); + } + Offset = CurDAG->getTargetConstant(Lo12, DL, VT); + return true; + } + + // Ask how constant materialization would handle this constant. + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits()); + + // If the last instruction would be an ADDI, we can fold its immediate and + // emit the rest of the sequence as the base. + if (Seq.back().Opc != RISCV::ADDI) + return false; + Lo12 = Seq.back().Imm; + + // Drop the last instruction. + Seq.pop_back(); + assert(!Seq.empty() && "Expected more instructions in sequence"); + + Base = SDValue(selectImmSeq(CurDAG, DL, VT, Seq), 0); + Offset = CurDAG->getTargetConstant(Lo12, DL, VT); + return true; +} + +// Is this ADD instruction only used as the base pointer of scalar loads and +// stores? +static bool isWorthFoldingAdd(SDValue Add) { + for (auto Use : Add->uses()) { + if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && + Use->getOpcode() != ISD::ATOMIC_LOAD && + Use->getOpcode() != ISD::ATOMIC_STORE) + return false; + EVT VT = cast<MemSDNode>(Use)->getMemoryVT(); + if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && + VT != MVT::f64) + return false; + // Don't allow stores of the value. It must be used as the address. + if (Use->getOpcode() == ISD::STORE && + cast<StoreSDNode>(Use)->getValue() == Add) + return false; + if (Use->getOpcode() == ISD::ATOMIC_STORE && + cast<AtomicSDNode>(Use)->getVal() == Add) + return false; + } + return true; } @@ -1947,9 +1898,10 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); assert(!isInt<12>(CVal) && "simm12 not already handled?"); + // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use + // an ADDI for part of the offset and fold the rest into the load/store. + // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { - // We can use an ADDI for part of the offset and fold the rest into the - // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. int64_t Adj = CVal < 0 ? -2048 : 2047; Base = SDValue( CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), @@ -1958,8 +1910,27 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); return true; } + + // For larger immediates, we might be able to save one instruction from + // constant materialization by folding the Lo12 bits of the immediate into + // the address. We should only do this if the ADD is only used by loads and + // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled + // separately with the full materialized immediate creating extra + // instructions. + if (isWorthFoldingAdd(Addr) && + selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, + Offset)) { + // Insert an ADD instruction with the materialized Hi52 bits. + Base = SDValue( + CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), + 0); + return true; + } } + if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) + return true; + Base = Addr; Offset = CurDAG->getTargetConstant(0, DL, VT); return true; @@ -2044,6 +2015,101 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { return false; } +/// Look for various patterns that can be done with a SHL that can be folded +/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which +/// SHXADD we are trying to match. +bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, + SDValue &Val) { + if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { + SDValue N0 = N.getOperand(0); + + bool LeftShift = N0.getOpcode() == ISD::SHL; + if ((LeftShift || N0.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(N0.getOperand(1))) { + uint64_t Mask = N.getConstantOperandVal(1); + unsigned C2 = N0.getConstantOperandVal(1); + + unsigned XLen = Subtarget->getXLen(); + if (LeftShift) + Mask &= maskTrailingZeros<uint64_t>(C2); + else + Mask &= maskTrailingOnes<uint64_t>(XLen - C2); + + // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no + // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 + // followed by a SHXADD with c3 for the X amount. + if (isShiftedMask_64(Mask)) { + unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); + unsigned Trailing = countTrailingZeros(Mask); + if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue(CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Trailing - C2, DL, VT)), + 0); + return true; + } + // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 + // leading zeros and c3 trailing zeros. We can use an SRLI by C3 + // followed by a SHXADD using c3 for the X amount. + if (!LeftShift && Leading == C2 && Trailing == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue( + CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), + 0); + return true; + } + } + } + } + + bool LeftShift = N.getOpcode() == ISD::SHL; + if ((LeftShift || N.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(N.getOperand(1))) { + SDValue N0 = N.getOperand(0); + if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + uint64_t Mask = N0.getConstantOperandVal(1); + if (isShiftedMask_64(Mask)) { + unsigned C1 = N.getConstantOperandVal(1); + unsigned XLen = Subtarget->getXLen(); + unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); + unsigned Trailing = countTrailingZeros(Mask); + // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and + // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. + if (LeftShift && Leading == 32 && Trailing > 0 && + (Trailing + C1) == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue(CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Trailing, DL, VT)), + 0); + return true; + } + // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and + // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. + if (!LeftShift && Leading == 32 && Trailing > C1 && + (Trailing - C1) == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue(CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Trailing, DL, VT)), + 0); + return true; + } + } + } + } + + return false; +} + // Return true if all users of this SDNode* only consume the lower \p Bits. // This can be used to form W instructions for add/sub/mul/shl even when the // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if @@ -2271,102 +2337,6 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, return false; } -// Merge an ADDI into the offset of a load/store instruction where possible. -// (load (addi base, off1), off2) -> (load base, off1+off2) -// (store val, (addi base, off1), off2) -> (store val, base, off1+off2) -// (load (add base, (addi src, off1)), off2) -// -> (load (add base, src), off1+off2) -// (store val, (add base, (addi src, off1)), off2) -// -> (store val, (add base, src), off1+off2) -// This is possible when off1+off2 fits a 12-bit immediate. -bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { - unsigned OffsetOpIdx, BaseOpIdx; - if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx)) - return false; - - if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) - return false; - - SDValue Base = N->getOperand(BaseOpIdx); - - if (!Base.isMachineOpcode()) - return false; - - if (Base.getMachineOpcode() == RISCV::ADDI) { - // If the base is an ADDI, we can merge it in to the load/store. - } else if (Base.getMachineOpcode() == RISCV::ADDIW && - isa<ConstantSDNode>(Base.getOperand(1)) && - Base.getOperand(0).isMachineOpcode() && - Base.getOperand(0).getMachineOpcode() == RISCV::LUI && - isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) { - // ADDIW can be merged if it's part of LUI+ADDIW constant materialization - // and LUI+ADDI would have produced the same result. This is true for all - // simm32 values except 0x7ffff800-0x7fffffff. - int64_t Offset = - SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12); - Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue(); - if (!isInt<32>(Offset)) - return false; - } else - return false; - - SDValue ImmOperand = Base.getOperand(1); - uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); - - if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { - int64_t Offset1 = Const->getSExtValue(); - int64_t CombinedOffset = Offset1 + Offset2; - if (!isInt<12>(CombinedOffset)) - return false; - ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), - ImmOperand.getValueType()); - } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { - // If the off1 in (addi base, off1) is a global variable's address (its - // low part, really), then we can rely on the alignment of that variable - // to provide a margin of safety before off1 can overflow the 12 bits. - // Check if off2 falls within that margin; if so off1+off2 can't overflow. - const DataLayout &DL = CurDAG->getDataLayout(); - Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL), - GA->getOffset()); - if (Offset2 != 0 && Alignment <= Offset2) - return false; - int64_t Offset1 = GA->getOffset(); - int64_t CombinedOffset = Offset1 + Offset2; - ImmOperand = CurDAG->getTargetGlobalAddress( - GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), - CombinedOffset, GA->getTargetFlags()); - } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { - // Ditto. - Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset()); - if (Offset2 != 0 && Alignment <= Offset2) - return false; - int64_t Offset1 = CP->getOffset(); - int64_t CombinedOffset = Offset1 + Offset2; - ImmOperand = CurDAG->getTargetConstantPool( - CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), - CombinedOffset, CP->getTargetFlags()); - } else { - return false; - } - - LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); - LLVM_DEBUG(Base->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\nN: "); - LLVM_DEBUG(N->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); - - // Modify the offset operand of the load/store. - if (BaseOpIdx == 0) { // Load - N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, - N->getOperand(2)); - } else { // Store - N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), - ImmOperand, N->getOperand(3)); - } - - return true; -} - // Try to remove sext.w if the input is a W instruction or can be made into // a W instruction cheaply. bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index b50927cfcca5..ef46204c00ac 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -47,7 +47,6 @@ public: bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectBaseAddr(SDValue Addr, SDValue &Base); bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); @@ -61,6 +60,17 @@ public: bool selectSExti32(SDValue N, SDValue &Val); bool selectZExti32(SDValue N, SDValue &Val); + bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val); + bool selectSH1ADDOp(SDValue N, SDValue &Val) { + return selectSHXADDOp(N, 1, Val); + } + bool selectSH2ADDOp(SDValue N, SDValue &Val) { + return selectSHXADDOp(N, 2, Val); + } + bool selectSH3ADDOp(SDValue N, SDValue &Val) { + return selectSHXADDOp(N, 3, Val); + } + bool hasAllNBitUsers(SDNode *Node, unsigned Bits) const; bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); } bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); } @@ -118,7 +128,6 @@ public: #include "RISCVGenDAGISel.inc" private: - bool doPeepholeLoadStoreADDI(SDNode *Node); bool doPeepholeSExtW(SDNode *Node); bool doPeepholeMaskedRVV(SDNode *Node); }; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ff645dea4e7a..658865703079 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -526,6 +526,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_TRUNCATE, ISD::VP_SETCC}, VT, Custom); setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); + + setOperationPromotedToType( + ISD::VECTOR_SPLICE, VT, + MVT::getVectorVT(MVT::i8, VT.getVectorElementCount())); } for (MVT VT : IntVecVTs) { @@ -1157,6 +1161,37 @@ bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const { return C && C->getAPIntValue().ule(10); } +bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getIntegerBitWidth(); + if (BitSize > Subtarget.getXLen()) + return false; + + // Fast path, assume 32-bit immediates are cheap. + int64_t Val = Imm.getSExtValue(); + if (isInt<32>(Val)) + return true; + + // A constant pool entry may be more aligned thant he load we're trying to + // replace. If we don't support unaligned scalar mem, prefer the constant + // pool. + // TODO: Can the caller pass down the alignment? + if (!Subtarget.enableUnalignedScalarMem()) + return true; + + // Prefer to keep the load if it would require many instructions. + // This uses the same threshold we use for constant pools but doesn't + // check useConstantPoolForLargeInts. + // TODO: Should we keep the load only when we're definitely going to emit a + // constant pool? + + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits()); + return Seq.size() <= Subtarget.getMaxBuildIntsCost(); +} + bool RISCVTargetLowering:: shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, @@ -1659,7 +1694,7 @@ static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, /// Return the type of the mask type suitable for masking the provided /// vector type. This is simply an i1 element type vector of the same /// (possibly scalable) length. -static MVT getMaskTypeFor(EVT VecVT) { +static MVT getMaskTypeFor(MVT VecVT) { assert(VecVT.isVector()); ElementCount EC = VecVT.getVectorElementCount(); return MVT::getVectorVT(MVT::i1, EC); @@ -5748,8 +5783,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask, UpOffset); return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, SlideDown, V2, UpOffset, - TrueMask, - DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT)); + TrueMask, DAG.getRegister(RISCV::X0, XLenVT)); } SDValue @@ -8530,12 +8564,6 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { return Opcode; } -// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) -// FIXME: Should this be a generic combine? There's a similar combine on X86. -// -// Also try these folds where an add or sub is in the middle. -// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) -// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { assert(N->getOpcode() == ISD::SRA && "Unexpected opcode"); @@ -8543,12 +8571,40 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit()) return SDValue(); - auto *ShAmtC = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (!ShAmtC || ShAmtC->getZExtValue() > 32) + if (!isa<ConstantSDNode>(N->getOperand(1))) + return SDValue(); + uint64_t ShAmt = N->getConstantOperandVal(1); + if (ShAmt > 32) return SDValue(); SDValue N0 = N->getOperand(0); + // Combine (sra (sext_inreg (shl X, C1), i32), C2) -> + // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of + // SLLIW+SRAIW. SLLI+SRAI have compressed forms. + if (ShAmt < 32 && + N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() && + cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 && + N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { + uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1); + if (LShAmt < 32) { + SDLoc ShlDL(N0.getOperand(0)); + SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64, + N0.getOperand(0).getOperand(0), + DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64)); + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl, + DAG.getConstant(ShAmt + 32, DL, MVT::i64)); + } + } + + // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) + // FIXME: Should this be a generic combine? There's a similar combine on X86. + // + // Also try these folds where an add or sub is in the middle. + // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) + // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) SDValue Shl; ConstantSDNode *AddC = nullptr; @@ -8594,12 +8650,12 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In, DAG.getValueType(MVT::i32)); - if (ShAmtC->getZExtValue() == 32) + if (ShAmt == 32) return SExt; return DAG.getNode( ISD::SHL, DL, MVT::i64, SExt, - DAG.getConstant(32 - ShAmtC->getZExtValue(), DL, MVT::i64)); + DAG.getConstant(32 - ShAmt, DL, MVT::i64)); } SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, @@ -9152,10 +9208,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // FIXME: Support FP. if (Val.getOpcode() == RISCVISD::VMV_X_S) { SDValue Src = Val.getOperand(0); - EVT VecVT = Src.getValueType(); + MVT VecVT = Src.getSimpleValueType(); EVT MemVT = Store->getMemoryVT(); // The memory VT and the element type must match. - if (VecVT.getVectorElementType() == MemVT) { + if (MemVT == VecVT.getVectorElementType()) { SDLoc DL(N); MVT MaskVT = getMaskTypeFor(VecVT); return DAG.getStoreVP( @@ -9864,7 +9920,7 @@ EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, Register FLHS = First.getOperand(1).getReg(); Register FRHS = First.getOperand(2).getReg(); // Insert appropriate branch. - BuildMI(ThisMBB, DL, TII.getBrCond(FirstCC)) + BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC)) .addReg(FLHS) .addReg(FRHS) .addMBB(SinkMBB); @@ -9876,7 +9932,7 @@ EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm()); // Insert appropriate branch. - BuildMI(FirstMBB, DL, TII.getBrCond(SecondCC)) + BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC)) .addReg(SLHS) .addReg(SRHS) .addMBB(SinkMBB); @@ -9884,9 +9940,9 @@ EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, Register DestReg = Second.getOperand(0).getReg(); Register Op2Reg4 = Second.getOperand(4).getReg(); BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg) - .addReg(Op1Reg4) - .addMBB(ThisMBB) .addReg(Op2Reg4) + .addMBB(ThisMBB) + .addReg(Op1Reg4) .addMBB(FirstMBB) .addReg(Op1Reg5) .addMBB(SecondMBB); @@ -12096,6 +12152,17 @@ const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry( return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); } +bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { + // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power + // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be + // a power of two as well. + // FIXME: This doesn't work for zve32, but that's already broken + // elsewhere for the same reason. + assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported"); + assert(RISCV::RVVBitsPerBlock == 64 && "RVVBitsPerBlock changed, audit needed"); + return true; +} + bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index eb013d4b6682..5e15176de59c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -520,9 +520,7 @@ public: SmallVectorImpl<SDValue> &InVals) const override; bool shouldConvertConstantLoadToIntImm(const APInt &Imm, - Type *Ty) const override { - return true; - } + Type *Ty) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; bool shouldConsiderGEPOffsetSplit() const override { return true; } @@ -599,6 +597,8 @@ public: unsigned uid, MCContext &Ctx) const override; + bool isVScaleKnownToBeAPowerOfTwo() const override; + private: /// RISCVCCAssignFn - This target-specific function extends the default /// CCValAssign with additional information used to lower RISC-V calling diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index ee4c026af8f4..06a90438838e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -384,7 +384,6 @@ def uimm6gt32 : ImmLeaf<XLenVT, [{ // Necessary because a frameindex can't be matched directly in a pattern. def FrameAddrRegImm : ComplexPattern<iPTR, 2, "SelectFrameAddrRegImm", [frameindex, or, add]>; -def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">; def AddrRegImm : ComplexPattern<iPTR, 2, "SelectAddrRegImm">; // Return the negation of an immediate value. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index f8bc241039f8..1ad634344c09 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -115,6 +115,35 @@ class VSXSched<int n, string o> : class VLFSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDFF" # n), ReadVLDX, ReadVMask]>; +// Unit-Stride Segment Loads and Stores +class VLSEGSched<int nf, int eew> : Sched<[ + !cast<SchedReadWrite>("WriteVLSEG" #nf #"e" #eew), ReadVLDX, ReadVMask]>; +class VSSEGSched<int nf, int eew> : Sched<[ + !cast<SchedReadWrite>("WriteVSSEG" #nf #"e" #eew), + !cast<SchedReadWrite>("ReadVSTE" #eew #"V"), ReadVSTX, ReadVMask]>; +class VLSEGFFSched<int nf, int eew> : Sched<[ + !cast<SchedReadWrite>("WriteVLSEGFF" #nf #"e" #eew), ReadVLDX, ReadVMask]>; +// Strided Segment Loads and Stores +class VLSSEGSched<int nf, int eew> : Sched<[ + !cast<SchedReadWrite>("WriteVLSSEG" #nf #"e" #eew), ReadVLDX, ReadVLDSX, + ReadVMask]>; +class VSSSEGSched<int nf, int eew> : Sched<[ + !cast<SchedReadWrite>("WriteVSSSEG" #nf #"e" #eew), + !cast<SchedReadWrite>("ReadVSTS" #eew #"V"), ReadVSTX, ReadVSTSX, ReadVMask]>; +// Indexed Segment Loads and Stores +class VLUXSEGSched<int nf, int eew> : Sched<[ + !cast<SchedReadWrite>("WriteVLUXSEG" #nf #"e" #eew), ReadVLDX, ReadVLDUXV, + ReadVMask]>; +class VLOXSEGSched<int nf, int eew> : Sched<[ + !cast<SchedReadWrite>("WriteVLOXSEG" #nf #"e" #eew), ReadVLDX, ReadVLDOXV, + ReadVMask]>; +class VSUXSEGSched<int nf, int eew> : Sched<[ + !cast<SchedReadWrite>("WriteVSUXSEG" #nf #"e" #eew), + !cast<SchedReadWrite>("ReadVSTUX" #eew), ReadVSTX, ReadVSTUXV, ReadVMask]>; +class VSOXSEGSched<int nf, int eew> : Sched<[ + !cast<SchedReadWrite>("WriteVSOXSEG" #nf #"e" #eew), + !cast<SchedReadWrite>("ReadVSTOX" #eew), ReadVSTX, ReadVSTOXV, ReadVMask]>; + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -1476,14 +1505,9 @@ defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0, RVVConstraint = NoConstraint in { -def VMV1R_V : RVInstV<0b100111, 0, OPIVI, (outs VR:$vd), (ins VR:$vs2), - "vmv1r.v", "$vd, $vs2">, VMVRSched<1> { - let Uses = []; - let vm = 1; -} // A future extension may relax the vector register alignment restrictions. -foreach n = [2, 4, 8] in { - defvar vrc = !cast<VReg>("VRM"#n); +foreach n = [1, 2, 4, 8] in { + defvar vrc = !cast<VReg>(!if(!eq(n, 1), "VR", "VRM"#n)); def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs vrc:$vd), (ins vrc:$vs2), "vmv" # n # "r.v", "$vd, $vs2">, VMVRSched<n> { @@ -1500,31 +1524,35 @@ let Predicates = [HasVInstructions] in { defvar w = !cast<RISCVWidth>("LSWidth"#eew); def VLSEG#nf#E#eew#_V : - VUnitStrideSegmentLoad<!add(nf, -1), w, "vlseg"#nf#"e"#eew#".v">; + VUnitStrideSegmentLoad<!add(nf, -1), w, "vlseg"#nf#"e"#eew#".v">, + VLSEGSched<nf, eew>; def VLSEG#nf#E#eew#FF_V : - VUnitStrideSegmentLoadFF<!add(nf, -1), w, "vlseg"#nf#"e"#eew#"ff.v">; + VUnitStrideSegmentLoadFF<!add(nf, -1), w, "vlseg"#nf#"e"#eew#"ff.v">, + VLSEGFFSched<nf, eew>; def VSSEG#nf#E#eew#_V : - VUnitStrideSegmentStore<!add(nf, -1), w, "vsseg"#nf#"e"#eew#".v">; - + VUnitStrideSegmentStore<!add(nf, -1), w, "vsseg"#nf#"e"#eew#".v">, + VSSEGSched<nf, eew>; // Vector Strided Instructions def VLSSEG#nf#E#eew#_V : - VStridedSegmentLoad<!add(nf, -1), w, "vlsseg"#nf#"e"#eew#".v">; + VStridedSegmentLoad<!add(nf, -1), w, "vlsseg"#nf#"e"#eew#".v">, + VLSSEGSched<nf, eew>; def VSSSEG#nf#E#eew#_V : - VStridedSegmentStore<!add(nf, -1), w, "vssseg"#nf#"e"#eew#".v">; + VStridedSegmentStore<!add(nf, -1), w, "vssseg"#nf#"e"#eew#".v">, + VSSSEGSched<nf, eew>; // Vector Indexed Instructions def VLUXSEG#nf#EI#eew#_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, w, - "vluxseg"#nf#"ei"#eew#".v">; + "vluxseg"#nf#"ei"#eew#".v">, VLUXSEGSched<nf, eew>; def VLOXSEG#nf#EI#eew#_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, w, - "vloxseg"#nf#"ei"#eew#".v">; + "vloxseg"#nf#"ei"#eew#".v">, VLOXSEGSched<nf, eew>; def VSUXSEG#nf#EI#eew#_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, w, - "vsuxseg"#nf#"ei"#eew#".v">; + "vsuxseg"#nf#"ei"#eew#".v">, VSUXSEGSched<nf, eew>; def VSOXSEG#nf#EI#eew#_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, w, - "vsoxseg"#nf#"ei"#eew#".v">; + "vsoxseg"#nf#"ei"#eew#".v">, VSOXSEGSched<nf, eew>; } } } // Predicates = [HasVInstructions] @@ -1533,17 +1561,22 @@ let Predicates = [HasVInstructionsI64] in { foreach nf=2-8 in { // Vector Unit-strided Segment Instructions def VLSEG#nf#E64_V : - VUnitStrideSegmentLoad<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64.v">; + VUnitStrideSegmentLoad<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64.v">, + VLSEGSched<nf, 64>; def VLSEG#nf#E64FF_V : - VUnitStrideSegmentLoadFF<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64ff.v">; + VUnitStrideSegmentLoadFF<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64ff.v">, + VLSEGFFSched<nf, 64>; def VSSEG#nf#E64_V : - VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">; + VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">, + VSSEGSched<nf, 64>; // Vector Strided Segment Instructions def VLSSEG#nf#E64_V : - VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">; + VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">, + VLSSEGSched<nf, 64>; def VSSSEG#nf#E64_V : - VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">; + VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">, + VSSSEGSched<nf, 64>; } } // Predicates = [HasVInstructionsI64] let Predicates = [HasVInstructionsI64, IsRV64] in { @@ -1551,16 +1584,16 @@ let Predicates = [HasVInstructionsI64, IsRV64] in { // Vector Indexed Segment Instructions def VLUXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, LSWidth64, - "vluxseg"#nf#"ei64.v">; + "vluxseg"#nf#"ei64.v">, VLUXSEGSched<nf, 64>; def VLOXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, LSWidth64, - "vloxseg"#nf#"ei64.v">; + "vloxseg"#nf#"ei64.v">, VLOXSEGSched<nf, 64>; def VSUXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, LSWidth64, - "vsuxseg"#nf#"ei64.v">; + "vsuxseg"#nf#"ei64.v">, VSUXSEGSched<nf, 64>; def VSOXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, LSWidth64, - "vsoxseg"#nf#"ei64.v">; + "vsoxseg"#nf#"ei64.v">, VSOXSEGSched<nf, 64>; } } // Predicates = [HasVInstructionsI64, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 06d4c4d0a9e6..b7b25643e397 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -34,11 +34,11 @@ multiclass VPatUSLoadStoreSDNode<ValueType type, defvar load_instr = !cast<Instruction>("PseudoVLE"#sew#"_V_"#vlmul.MX); defvar store_instr = !cast<Instruction>("PseudoVSE"#sew#"_V_"#vlmul.MX); // Load - def : Pat<(type (load BaseAddr:$rs1)), - (load_instr BaseAddr:$rs1, avl, log2sew)>; + def : Pat<(type (load GPR:$rs1)), + (load_instr GPR:$rs1, avl, log2sew)>; // Store - def : Pat<(store type:$rs2, BaseAddr:$rs1), - (store_instr reg_class:$rs2, BaseAddr:$rs1, avl, log2sew)>; + def : Pat<(store type:$rs2, GPR:$rs1), + (store_instr reg_class:$rs2, GPR:$rs1, avl, log2sew)>; } multiclass VPatUSLoadStoreWholeVRSDNode<ValueType type, @@ -53,11 +53,11 @@ multiclass VPatUSLoadStoreWholeVRSDNode<ValueType type, !cast<Instruction>("VS"#!substr(vlmul.MX, 1)#"R_V"); // Load - def : Pat<(type (load BaseAddr:$rs1)), - (load_instr BaseAddr:$rs1)>; + def : Pat<(type (load GPR:$rs1)), + (load_instr GPR:$rs1)>; // Store - def : Pat<(store type:$rs2, BaseAddr:$rs1), - (store_instr reg_class:$rs2, BaseAddr:$rs1)>; + def : Pat<(store type:$rs2, GPR:$rs1), + (store_instr reg_class:$rs2, GPR:$rs1)>; } multiclass VPatUSLoadStoreMaskSDNode<MTypeInfo m> @@ -65,11 +65,11 @@ multiclass VPatUSLoadStoreMaskSDNode<MTypeInfo m> defvar load_instr = !cast<Instruction>("PseudoVLM_V_"#m.BX); defvar store_instr = !cast<Instruction>("PseudoVSM_V_"#m.BX); // Load - def : Pat<(m.Mask (load BaseAddr:$rs1)), - (load_instr BaseAddr:$rs1, m.AVL, m.Log2SEW)>; + def : Pat<(m.Mask (load GPR:$rs1)), + (load_instr GPR:$rs1, m.AVL, m.Log2SEW)>; // Store - def : Pat<(store m.Mask:$rs2, BaseAddr:$rs1), - (store_instr VR:$rs2, BaseAddr:$rs1, m.AVL, m.Log2SEW)>; + def : Pat<(store m.Mask:$rs2, GPR:$rs1), + (store_instr VR:$rs2, GPR:$rs1, m.AVL, m.Log2SEW)>; } class VPatBinarySDNode_VV<SDNode vop, @@ -1038,10 +1038,14 @@ let Predicates = [HasVInstructionsAnyF] in foreach vti = AllFloatVectors in { // Fold store of vmv.f.s to a vse with VL=1. defvar store_instr = !cast<Instruction>("PseudoVSE"#vti.SEW#"_V_"#vti.LMul.MX); - def : Pat<(store (vti.Scalar (int_riscv_vfmv_f_s (vti.Vector vti.RegClass:$rs2))), BaseAddr:$rs1), - (store_instr vti.RegClass:$rs2, BaseAddr:$rs1, 1, vti.Log2SEW)>; - def : Pat<(store (extractelt (vti.Vector vti.RegClass:$rs2), 0), BaseAddr:$rs1), - (store_instr vti.RegClass:$rs2, BaseAddr:$rs1, 1, vti.Log2SEW)>; + + let AddedComplexity = 2 in { + // Add complexity to increase the priority of this pattern being matched. + def : Pat<(store (vti.Scalar (int_riscv_vfmv_f_s (vti.Vector vti.RegClass:$rs2))), GPR:$rs1), + (store_instr vti.RegClass:$rs2, GPR:$rs1, 1, vti.Log2SEW)>; + def : Pat<(store (extractelt (vti.Vector vti.RegClass:$rs2), 0), GPR:$rs1), + (store_instr vti.RegClass:$rs2, GPR:$rs1, 1, vti.Log2SEW)>; + } defvar vmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_", vti.ScalarSuffix, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 081f61617d59..49306bb0f4e2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -76,13 +76,13 @@ def riscv_urem_vl : SDNode<"RISCVISD::UREM_VL", SDT_RISCVIntBinOp_VL>; def riscv_shl_vl : SDNode<"RISCVISD::SHL_VL", SDT_RISCVIntBinOp_VL>; def riscv_sra_vl : SDNode<"RISCVISD::SRA_VL", SDT_RISCVIntBinOp_VL>; def riscv_srl_vl : SDNode<"RISCVISD::SRL_VL", SDT_RISCVIntBinOp_VL>; -def riscv_smin_vl : SDNode<"RISCVISD::SMIN_VL", SDT_RISCVIntBinOp_VL>; -def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL>; -def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL>; -def riscv_umax_vl : SDNode<"RISCVISD::UMAX_VL", SDT_RISCVIntBinOp_VL>; +def riscv_smin_vl : SDNode<"RISCVISD::SMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; +def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; +def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; +def riscv_umax_vl : SDNode<"RISCVISD::UMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; -def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL>; -def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL>; +def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; +def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>; def riscv_usubsat_vl : SDNode<"RISCVISD::USUBSAT_VL", SDT_RISCVIntBinOp_VL>; @@ -94,8 +94,8 @@ def riscv_fneg_vl : SDNode<"RISCVISD::FNEG_VL", SDT_RISCVFPUnOp_VL>; def riscv_fabs_vl : SDNode<"RISCVISD::FABS_VL", SDT_RISCVFPUnOp_VL>; def riscv_fsqrt_vl : SDNode<"RISCVISD::FSQRT_VL", SDT_RISCVFPUnOp_VL>; def riscv_fcopysign_vl : SDNode<"RISCVISD::FCOPYSIGN_VL", SDT_RISCVFPBinOp_VL>; -def riscv_fminnum_vl : SDNode<"RISCVISD::FMINNUM_VL", SDT_RISCVFPBinOp_VL>; -def riscv_fmaxnum_vl : SDNode<"RISCVISD::FMAXNUM_VL", SDT_RISCVFPBinOp_VL>; +def riscv_fminnum_vl : SDNode<"RISCVISD::FMINNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>; +def riscv_fmaxnum_vl : SDNode<"RISCVISD::FMAXNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>; def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 9532d1dd3dd2..02ae4f88d56a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -83,13 +83,13 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{ def BCLRXForm : SDNodeXForm<imm, [{ // Find the lowest 0. - return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(), + return CurDAG->getTargetConstant(countTrailingOnes(N->getZExtValue()), SDLoc(N), N->getValueType(0)); }]>; def BSETINVXForm : SDNodeXForm<imm, [{ // Find the lowest 1. - return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(), + return CurDAG->getTargetConstant(countTrailingZeros(N->getZExtValue()), SDLoc(N), N->getValueType(0)); }]>; @@ -239,6 +239,10 @@ def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ return !C || !isInt<12>(C->getSExtValue()); }]>; +def sh1add_op : ComplexPattern<XLenVT, 1, "selectSH1ADDOp", [], [], 6>; +def sh2add_op : ComplexPattern<XLenVT, 1, "selectSH2ADDOp", [], [], 6>; +def sh3add_op : ComplexPattern<XLenVT, 1, "selectSH3ADDOp", [], [], 6>; + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -1095,6 +1099,14 @@ def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), non_imm12:$rs2), def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), non_imm12:$rs2), (SH3ADD GPR:$rs1, GPR:$rs2)>; +// More complex cases use a ComplexPattern. +def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2), + (SH1ADD sh1add_op:$rs1, GPR:$rs2)>; +def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2), + (SH2ADD sh2add_op:$rs1, GPR:$rs2)>; +def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2), + (SH3ADD sh3add_op:$rs1, GPR:$rs2)>; + def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2), (SH1ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>; def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2), @@ -1190,18 +1202,6 @@ def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)), (SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>; def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)), (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>; - -// Use SRLIW to shift out the LSBs and zero the upper 32-bits. Use SHXADD to -// shift zeros into the LSBs the addition shl amount. -def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFE), (i64 1)), - non_imm12:$rs2)), - (SH2ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; -def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFE), (i64 2)), - non_imm12:$rs2)), - (SH3ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; -def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFC), (i64 1)), - non_imm12:$rs2)), - (SH3ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>; } // Predicates = [HasStdExtZba, IsRV64] let Predicates = [HasStdExtZbcOrZbkc] in { diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp index 1fc424411c12..dad0aa476471 100644 --- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp +++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -293,8 +293,16 @@ static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm, assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) && "Unsupported instruction for this optimization."); + int SkipN = 0; + + // Skip the first (value) operand to a store instruction (except if the store + // offset is zero) in order to avoid an incorrect transformation. + // e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2) + if (isCompressibleStore(MI) && OldRegImm.Imm != 0) + SkipN = 1; + // Update registers - for (MachineOperand &MO : MI.operands()) + for (MachineOperand &MO : drop_begin(MI.operands(), SkipN)) if (MO.isReg() && MO.getReg() == OldRegImm.Reg) { // Do not update operands that define the old register. // diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index 43af1802d706..bafcf47b82e4 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -53,6 +53,20 @@ def WriteVLDFF8 : SchedWrite; def WriteVLDFF16 : SchedWrite; def WriteVLDFF32 : SchedWrite; def WriteVLDFF64 : SchedWrite; +// 7.8. Vector Segment Instructions +foreach nf=2-8 in { + foreach eew = [8, 16, 32, 64] in { + def WriteVLSEG # nf # e # eew : SchedWrite; + def WriteVSSEG # nf # e # eew : SchedWrite; + def WriteVLSEGFF # nf # e # eew : SchedWrite; + def WriteVLSSEG # nf # e # eew : SchedWrite; + def WriteVSSSEG # nf # e # eew : SchedWrite; + def WriteVLUXSEG # nf # e # eew : SchedWrite; + def WriteVLOXSEG # nf # e # eew : SchedWrite; + def WriteVSUXSEG # nf # e # eew : SchedWrite; + def WriteVSOXSEG # nf # e # eew : SchedWrite; + } +} // 7.9. Vector Whole Register Instructions def WriteVLD1R8 : SchedWrite; def WriteVLD1R16 : SchedWrite; @@ -538,6 +552,20 @@ def : WriteRes<WriteVST1R, []>; def : WriteRes<WriteVST2R, []>; def : WriteRes<WriteVST4R, []>; def : WriteRes<WriteVST8R, []>; +// Vector Segment Loads and Stores +foreach nf=2-8 in { + foreach eew = [8, 16, 32, 64] in { + def : WriteRes <!cast<SchedWrite>("WriteVLSEG" # nf # "e" # eew), []>; + def : WriteRes <!cast<SchedWrite>("WriteVLSEGFF" # nf # "e" # eew), []>; + def : WriteRes <!cast<SchedWrite>("WriteVSSEG" # nf # "e" # eew), []>; + def : WriteRes <!cast<SchedWrite>("WriteVLSSEG" # nf # "e" # eew), []>; + def : WriteRes <!cast<SchedWrite>("WriteVSSSEG" # nf # "e" # eew), []>; + def : WriteRes <!cast<SchedWrite>("WriteVLUXSEG" # nf # "e" # eew), []>; + def : WriteRes <!cast<SchedWrite>("WriteVLOXSEG" # nf # "e" # eew), []>; + def : WriteRes <!cast<SchedWrite>("WriteVSUXSEG" # nf # "e" # eew), []>; + def : WriteRes <!cast<SchedWrite>("WriteVSOXSEG" # nf # "e" # eew), []>; + } +} // 12. Vector Integer Arithmetic Instructions def : WriteRes<WriteVIALUV, []>; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 7caf0fedb2ca..96c46fb7554f 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -57,6 +57,10 @@ public: bool shouldExpandReduction(const IntrinsicInst *II) const; bool supportsScalableVectors() const { return ST->hasVInstructions(); } + PredicationStyle emitGetActiveLaneMask() const { + return ST->hasVInstructions() ? PredicationStyle::Data + : PredicationStyle::None; + } Optional<unsigned> getMaxVScale() const; Optional<unsigned> getVScaleForTuning() const; diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp index d953bc590473..f726f42c9bcb 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp @@ -46,12 +46,6 @@ public: void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; - -private: - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; }; } // end anonymous namespace @@ -110,9 +104,6 @@ static void emitUntypedInstrOperands(const MCInst &MI, EndianWriter &OSE) { void SPIRVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - auto Features = computeAvailableFeatures(STI.getFeatureBits()); - verifyInstructionPredicates(MI, Features); - EndianWriter OSE(OS, support::little); // Encode the first 32 SPIR-V bytes with the number of args and the opcode. @@ -128,5 +119,4 @@ void SPIRVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, emitUntypedInstrOperands(MI, OSE); } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "SPIRVGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp index 6b8b4a73af92..62ce15550ae7 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "SPIRVGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h index 4009fa96aa68..abc8df34be0a 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h @@ -44,6 +44,7 @@ std::unique_ptr<MCObjectTargetWriter> createSPIRVObjectTargetWriter(); // Defines symbolic names for the SPIR-V instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "SPIRVGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp index 0de232651377..605bf949187f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -215,6 +215,9 @@ void SPIRVAsmPrinter::outputInstruction(const MachineInstr *MI) { } void SPIRVAsmPrinter::emitInstruction(const MachineInstr *MI) { + SPIRV_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + if (!MAI->getSkipEmission(MI)) outputInstruction(MI); diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index df07a126eeea..5b6b82aebf30 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -68,6 +68,7 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { assert(GR && "Must initialize the SPIRV type registry before lowering args."); + GR->setCurrentFunc(MIRBuilder.getMF()); // Assign types and names to all args, and store their types for later. SmallVector<Register, 4> ArgTypeVRegs; @@ -114,6 +115,8 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, auto MRI = MIRBuilder.getMRI(); Register FuncVReg = MRI->createGenericVirtualRegister(LLT::scalar(32)); MRI->setRegClass(FuncVReg, &SPIRV::IDRegClass); + if (F.isDeclaration()) + GR->add(&F, &MIRBuilder.getMF(), FuncVReg); auto *FTy = F.getFunctionType(); auto FuncTy = GR->assignTypeToVReg(FTy, FuncVReg, MIRBuilder); @@ -136,6 +139,8 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, MIRBuilder.buildInstr(SPIRV::OpFunctionParameter) .addDef(VRegs[i][0]) .addUse(ArgTypeVRegs[i]); + if (F.isDeclaration()) + GR->add(F.getArg(i), &MIRBuilder.getMF(), VRegs[i][0]); } // Name the function. if (F.hasName()) @@ -165,6 +170,7 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (Info.OrigRet.Regs.size() > 1) return false; + GR->setCurrentFunc(MIRBuilder.getMF()); Register ResVReg = Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0]; // Emit a regular OpFunctionCall. If it's an externally declared function, diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp new file mode 100644 index 000000000000..57cd4bafd351 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp @@ -0,0 +1,95 @@ +//===-- SPIRVDuplicatesTracker.cpp - SPIR-V Duplicates Tracker --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// General infrastructure for keeping track of the values that according to +// the SPIR-V binary layout should be global to the whole module. +// +//===----------------------------------------------------------------------===// + +#include "SPIRVDuplicatesTracker.h" + +using namespace llvm; + +template <typename T> +void SPIRVGeneralDuplicatesTracker::prebuildReg2Entry( + SPIRVDuplicatesTracker<T> &DT, SPIRVReg2EntryTy &Reg2Entry) { + for (auto &TPair : DT.getAllUses()) { + for (auto &RegPair : TPair.second) { + const MachineFunction *MF = RegPair.first; + Register R = RegPair.second; + MachineInstr *MI = MF->getRegInfo().getUniqueVRegDef(R); + if (!MI) + continue; + Reg2Entry[&MI->getOperand(0)] = &TPair.second; + } + } +} + +void SPIRVGeneralDuplicatesTracker::buildDepsGraph( + std::vector<SPIRV::DTSortableEntry *> &Graph, + MachineModuleInfo *MMI = nullptr) { + SPIRVReg2EntryTy Reg2Entry; + prebuildReg2Entry(TT, Reg2Entry); + prebuildReg2Entry(CT, Reg2Entry); + prebuildReg2Entry(GT, Reg2Entry); + prebuildReg2Entry(FT, Reg2Entry); + prebuildReg2Entry(AT, Reg2Entry); + + for (auto &Op2E : Reg2Entry) { + SPIRV::DTSortableEntry *E = Op2E.second; + Graph.push_back(E); + for (auto &U : *E) { + const MachineRegisterInfo &MRI = U.first->getRegInfo(); + MachineInstr *MI = MRI.getUniqueVRegDef(U.second); + if (!MI) + continue; + assert(MI && MI->getParent() && "No MachineInstr created yet"); + for (auto i = MI->getNumDefs(); i < MI->getNumOperands(); i++) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg()) + continue; + MachineOperand *RegOp = &MRI.getVRegDef(Op.getReg())->getOperand(0); + assert((MI->getOpcode() == SPIRV::OpVariable && i == 3) || + Reg2Entry.count(RegOp)); + if (Reg2Entry.count(RegOp)) + E->addDep(Reg2Entry[RegOp]); + } + + if (E->getIsFunc()) { + MachineInstr *Next = MI->getNextNode(); + if (Next && (Next->getOpcode() == SPIRV::OpFunction || + Next->getOpcode() == SPIRV::OpFunctionParameter)) { + E->addDep(Reg2Entry[&Next->getOperand(0)]); + } + } + } + } + + if (MMI) { + const Module *M = MMI->getModule(); + for (auto F = M->begin(), E = M->end(); F != E; ++F) { + const MachineFunction *MF = MMI->getMachineFunction(*F); + if (!MF) + continue; + for (const MachineBasicBlock &MBB : *MF) { + for (const MachineInstr &CMI : MBB) { + MachineInstr &MI = const_cast<MachineInstr &>(CMI); + MI.dump(); + if (MI.getNumExplicitDefs() > 0 && + Reg2Entry.count(&MI.getOperand(0))) { + dbgs() << "\t["; + for (SPIRV::DTSortableEntry *D : + Reg2Entry.lookup(&MI.getOperand(0))->getDeps()) + dbgs() << Register::virtReg2Index(D->lookup(MF)) << ", "; + dbgs() << "]\n"; + } + } + } + } + } +}
\ No newline at end of file diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h new file mode 100644 index 000000000000..58ae1f86ce42 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h @@ -0,0 +1,174 @@ +//===-- SPIRVDuplicatesTracker.h - SPIR-V Duplicates Tracker ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// General infrastructure for keeping track of the values that according to +// the SPIR-V binary layout should be global to the whole module. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVDUPLICATESTRACKER_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVDUPLICATESTRACKER_H + +#include "MCTargetDesc/SPIRVBaseInfo.h" +#include "MCTargetDesc/SPIRVMCTargetDesc.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" + +#include <type_traits> + +namespace llvm { +namespace SPIRV { +// NOTE: using MapVector instead of DenseMap because it helps getting +// everything ordered in a stable manner for a price of extra (NumKeys)*PtrSize +// memory and expensive removals which do not happen anyway. +class DTSortableEntry : public MapVector<const MachineFunction *, Register> { + SmallVector<DTSortableEntry *, 2> Deps; + + struct FlagsTy { + unsigned IsFunc : 1; + unsigned IsGV : 1; + // NOTE: bit-field default init is a C++20 feature. + FlagsTy() : IsFunc(0), IsGV(0) {} + }; + FlagsTy Flags; + +public: + // Common hoisting utility doesn't support function, because their hoisting + // require hoisting of params as well. + bool getIsFunc() const { return Flags.IsFunc; } + bool getIsGV() const { return Flags.IsGV; } + void setIsFunc(bool V) { Flags.IsFunc = V; } + void setIsGV(bool V) { Flags.IsGV = V; } + + const SmallVector<DTSortableEntry *, 2> &getDeps() const { return Deps; } + void addDep(DTSortableEntry *E) { Deps.push_back(E); } +}; +} // namespace SPIRV + +template <typename KeyTy> class SPIRVDuplicatesTrackerBase { +public: + // NOTE: using MapVector instead of DenseMap helps getting everything ordered + // in a stable manner for a price of extra (NumKeys)*PtrSize memory and + // expensive removals which don't happen anyway. + using StorageTy = MapVector<KeyTy, SPIRV::DTSortableEntry>; + +private: + StorageTy Storage; + +public: + void add(KeyTy V, const MachineFunction *MF, Register R) { + if (find(V, MF).isValid()) + return; + + Storage[V][MF] = R; + if (std::is_same<Function, + typename std::remove_const< + typename std::remove_pointer<KeyTy>::type>::type>() || + std::is_same<Argument, + typename std::remove_const< + typename std::remove_pointer<KeyTy>::type>::type>()) + Storage[V].setIsFunc(true); + if (std::is_same<GlobalVariable, + typename std::remove_const< + typename std::remove_pointer<KeyTy>::type>::type>()) + Storage[V].setIsGV(true); + } + + Register find(KeyTy V, const MachineFunction *MF) const { + auto iter = Storage.find(V); + if (iter != Storage.end()) { + auto Map = iter->second; + auto iter2 = Map.find(MF); + if (iter2 != Map.end()) + return iter2->second; + } + return Register(); + } + + const StorageTy &getAllUses() const { return Storage; } + +private: + StorageTy &getAllUses() { return Storage; } + + // The friend class needs to have access to the internal storage + // to be able to build dependency graph, can't declare only one + // function a 'friend' due to the incomplete declaration at this point + // and mutual dependency problems. + friend class SPIRVGeneralDuplicatesTracker; +}; + +template <typename T> +class SPIRVDuplicatesTracker : public SPIRVDuplicatesTrackerBase<const T *> {}; + +class SPIRVGeneralDuplicatesTracker { + SPIRVDuplicatesTracker<Type> TT; + SPIRVDuplicatesTracker<Constant> CT; + SPIRVDuplicatesTracker<GlobalVariable> GT; + SPIRVDuplicatesTracker<Function> FT; + SPIRVDuplicatesTracker<Argument> AT; + + // NOTE: using MOs instead of regs to get rid of MF dependency to be able + // to use flat data structure. + // NOTE: replacing DenseMap with MapVector doesn't affect overall correctness + // but makes LITs more stable, should prefer DenseMap still due to + // significant perf difference. + using SPIRVReg2EntryTy = + MapVector<MachineOperand *, SPIRV::DTSortableEntry *>; + + template <typename T> + void prebuildReg2Entry(SPIRVDuplicatesTracker<T> &DT, + SPIRVReg2EntryTy &Reg2Entry); + +public: + void buildDepsGraph(std::vector<SPIRV::DTSortableEntry *> &Graph, + MachineModuleInfo *MMI); + + void add(const Type *T, const MachineFunction *MF, Register R) { + TT.add(T, MF, R); + } + + void add(const Constant *C, const MachineFunction *MF, Register R) { + CT.add(C, MF, R); + } + + void add(const GlobalVariable *GV, const MachineFunction *MF, Register R) { + GT.add(GV, MF, R); + } + + void add(const Function *F, const MachineFunction *MF, Register R) { + FT.add(F, MF, R); + } + + void add(const Argument *Arg, const MachineFunction *MF, Register R) { + AT.add(Arg, MF, R); + } + + Register find(const Type *T, const MachineFunction *MF) { + return TT.find(const_cast<Type *>(T), MF); + } + + Register find(const Constant *C, const MachineFunction *MF) { + return CT.find(const_cast<Constant *>(C), MF); + } + + Register find(const GlobalVariable *GV, const MachineFunction *MF) { + return GT.find(const_cast<GlobalVariable *>(GV), MF); + } + + Register find(const Function *F, const MachineFunction *MF) { + return FT.find(const_cast<Function *>(F), MF); + } + + Register find(const Argument *Arg, const MachineFunction *MF) { + return AT.find(const_cast<Argument *>(Arg), MF); + } +}; +} // namespace llvm +#endif
\ No newline at end of file diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 02a6905a1abc..5f890c003cbc 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -101,7 +101,6 @@ Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val, SPIRVType *SpvType, bool EmitIR) { auto &MF = MIRBuilder.getMF(); - Register Res; const IntegerType *LLVMIntTy; if (SpvType) LLVMIntTy = cast<IntegerType>(getTypeForSPIRVType(SpvType)); @@ -110,15 +109,18 @@ Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val, // Find a constant in DT or build a new one. const auto ConstInt = ConstantInt::get(const_cast<IntegerType *>(LLVMIntTy), Val); - unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; - Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth)); - assignTypeToVReg(LLVMIntTy, Res, MIRBuilder); - if (EmitIR) - MIRBuilder.buildConstant(Res, *ConstInt); - else - MIRBuilder.buildInstr(SPIRV::OpConstantI) - .addDef(Res) - .addImm(ConstInt->getSExtValue()); + Register Res = DT.find(ConstInt, &MF); + if (!Res.isValid()) { + unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; + Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth)); + assignTypeToVReg(LLVMIntTy, Res, MIRBuilder); + if (EmitIR) + MIRBuilder.buildConstant(Res, *ConstInt); + else + MIRBuilder.buildInstr(SPIRV::OpConstantI) + .addDef(Res) + .addImm(ConstInt->getSExtValue()); + } return Res; } @@ -126,7 +128,6 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val, MachineIRBuilder &MIRBuilder, SPIRVType *SpvType) { auto &MF = MIRBuilder.getMF(); - Register Res; const Type *LLVMFPTy; if (SpvType) { LLVMFPTy = getTypeForSPIRVType(SpvType); @@ -136,10 +137,13 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val, } // Find a constant in DT or build a new one. const auto ConstFP = ConstantFP::get(LLVMFPTy->getContext(), Val); - unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; - Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth)); - assignTypeToVReg(LLVMFPTy, Res, MIRBuilder); - MIRBuilder.buildFConstant(Res, *ConstFP); + Register Res = DT.find(ConstFP, &MF); + if (!Res.isValid()) { + unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; + Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth)); + assignTypeToVReg(LLVMFPTy, Res, MIRBuilder); + MIRBuilder.buildFConstant(Res, *ConstFP); + } return Res; } @@ -184,6 +188,7 @@ Register SPIRVGlobalRegistry::buildGlobalVariable( *Subtarget.getRegBankInfo()); } Reg = MIB->getOperand(0).getReg(); + DT.add(GVar, &MIRBuilder.getMF(), Reg); // Set to Reg the same type as ResVReg has. auto MRI = MIRBuilder.getMRI(); @@ -318,10 +323,11 @@ SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const { SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType( const Type *Type, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AccessQual, bool EmitIR) { + Register Reg = DT.find(Type, &MIRBuilder.getMF()); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); SPIRVType *SpirvType = createSPIRVType(Type, MIRBuilder, AccessQual, EmitIR); - VRegToTypeMap[&MIRBuilder.getMF()][getSPIRVTypeID(SpirvType)] = SpirvType; - SPIRVToLLVMType[SpirvType] = Type; - return SpirvType; + return restOfCreateSPIRVType(Type, SpirvType); } bool SPIRVGlobalRegistry::isScalarOfType(Register VReg, @@ -387,17 +393,21 @@ SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(unsigned BitWidth, MIRBuilder); } -SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(Type *LLVMTy, - MachineInstrBuilder MIB) { - SPIRVType *SpirvType = MIB; +SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(const Type *LLVMTy, + SPIRVType *SpirvType) { + assert(CurMF == SpirvType->getMF()); VRegToTypeMap[CurMF][getSPIRVTypeID(SpirvType)] = SpirvType; SPIRVToLLVMType[SpirvType] = LLVMTy; + DT.add(LLVMTy, CurMF, getSPIRVTypeID(SpirvType)); return SpirvType; } SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType( unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII) { Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), BitWidth); + Register Reg = DT.find(LLVMTy, CurMF); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); MachineBasicBlock &BB = *I.getParent(); auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeInt)) .addDef(createTypeVReg(CurMF->getRegInfo())) diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index 952ab4c13e29..13dcc20a3e0a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -17,6 +17,7 @@ #define LLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H #include "MCTargetDesc/SPIRVBaseInfo.h" +#include "SPIRVDuplicatesTracker.h" #include "SPIRVInstrInfo.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -30,7 +31,10 @@ class SPIRVGlobalRegistry { // where Reg = OpType... // while VRegToTypeMap tracks SPIR-V type assigned to other regs (i.e. not // type-declaring ones) - DenseMap<MachineFunction *, DenseMap<Register, SPIRVType *>> VRegToTypeMap; + DenseMap<const MachineFunction *, DenseMap<Register, SPIRVType *>> + VRegToTypeMap; + + SPIRVGeneralDuplicatesTracker DT; DenseMap<SPIRVType *, const Type *> SPIRVToLLVMType; @@ -48,6 +52,39 @@ public: MachineFunction *CurMF; + void add(const Constant *C, MachineFunction *MF, Register R) { + DT.add(C, MF, R); + } + + void add(const GlobalVariable *GV, MachineFunction *MF, Register R) { + DT.add(GV, MF, R); + } + + void add(const Function *F, MachineFunction *MF, Register R) { + DT.add(F, MF, R); + } + + void add(const Argument *Arg, MachineFunction *MF, Register R) { + DT.add(Arg, MF, R); + } + + Register find(const Constant *C, MachineFunction *MF) { + return DT.find(C, MF); + } + + Register find(const GlobalVariable *GV, MachineFunction *MF) { + return DT.find(GV, MF); + } + + Register find(const Function *F, MachineFunction *MF) { + return DT.find(F, MF); + } + + void buildDepsGraph(std::vector<SPIRV::DTSortableEntry *> &Graph, + MachineModuleInfo *MMI = nullptr) { + DT.buildDepsGraph(Graph, MMI); + } + // Get or create a SPIR-V type corresponding the given LLVM IR type, // and map it to the given VReg by creating an ASSIGN_TYPE instruction. SPIRVType *assignTypeToVReg( @@ -136,7 +173,7 @@ private: SPIRVType *getOpTypeFunction(SPIRVType *RetType, const SmallVectorImpl<SPIRVType *> &ArgTypes, MachineIRBuilder &MIRBuilder); - SPIRVType *restOfCreateSPIRVType(Type *LLVMTy, MachineInstrBuilder MIB); + SPIRVType *restOfCreateSPIRVType(const Type *LLVMTy, SPIRVType *SpirvType); public: Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder, diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 9294a60506a8..90b921a06f21 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -807,23 +807,29 @@ void SPIRVInstructionSelector::renderImm32(MachineInstrBuilder &MIB, Register SPIRVInstructionSelector::buildI32Constant(uint32_t Val, MachineInstr &I, const SPIRVType *ResType) const { + Type *LLVMTy = IntegerType::get(GR.CurMF->getFunction().getContext(), 32); const SPIRVType *SpvI32Ty = ResType ? ResType : GR.getOrCreateSPIRVIntegerType(32, I, TII); - Register NewReg; - NewReg = MRI->createGenericVirtualRegister(LLT::scalar(32)); - MachineInstr *MI; - MachineBasicBlock &BB = *I.getParent(); - if (Val == 0) { - MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) - .addDef(NewReg) - .addUse(GR.getSPIRVTypeID(SpvI32Ty)); - } else { - MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI)) - .addDef(NewReg) - .addUse(GR.getSPIRVTypeID(SpvI32Ty)) - .addImm(APInt(32, Val).getZExtValue()); + // Find a constant in DT or build a new one. + auto ConstInt = ConstantInt::get(LLVMTy, Val); + Register NewReg = GR.find(ConstInt, GR.CurMF); + if (!NewReg.isValid()) { + NewReg = MRI->createGenericVirtualRegister(LLT::scalar(32)); + GR.add(ConstInt, GR.CurMF, NewReg); + MachineInstr *MI; + MachineBasicBlock &BB = *I.getParent(); + if (Val == 0) { + MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) + .addDef(NewReg) + .addUse(GR.getSPIRVTypeID(SpvI32Ty)); + } else { + MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI)) + .addDef(NewReg) + .addUse(GR.getSPIRVTypeID(SpvI32Ty)) + .addImm(APInt(32, Val).getZExtValue()); + } + constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); } - constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); return NewReg; } diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index fa78dd7942c6..a39df5234935 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -28,6 +28,11 @@ using namespace llvm; #define DEBUG_TYPE "spirv-module-analysis" +static cl::opt<bool> + SPVDumpDeps("spv-dump-deps", + cl::desc("Dump MIR with SPIR-V dependencies info"), + cl::Optional, cl::init(false)); + char llvm::SPIRVModuleAnalysis::ID = 0; namespace llvm { @@ -113,6 +118,83 @@ static bool findSameInstrInMS(const MachineInstr &A, return false; } +// Collect MI which defines the register in the given machine function. +static void collectDefInstr(Register Reg, const MachineFunction *MF, + SPIRV::ModuleAnalysisInfo *MAI, + SPIRV::ModuleSectionType MSType, + bool DoInsert = true) { + assert(MAI->hasRegisterAlias(MF, Reg) && "Cannot find register alias"); + MachineInstr *MI = MF->getRegInfo().getUniqueVRegDef(Reg); + assert(MI && "There should be an instruction that defines the register"); + MAI->setSkipEmission(MI); + if (DoInsert) + MAI->MS[MSType].push_back(MI); +} + +void SPIRVModuleAnalysis::collectGlobalEntities( + const std::vector<SPIRV::DTSortableEntry *> &DepsGraph, + SPIRV::ModuleSectionType MSType, + std::function<bool(const SPIRV::DTSortableEntry *)> Pred, + bool UsePreOrder) { + DenseSet<const SPIRV::DTSortableEntry *> Visited; + for (const auto *E : DepsGraph) { + std::function<void(const SPIRV::DTSortableEntry *)> RecHoistUtil; + // NOTE: here we prefer recursive approach over iterative because + // we don't expect depchains long enough to cause SO. + RecHoistUtil = [MSType, UsePreOrder, &Visited, &Pred, + &RecHoistUtil](const SPIRV::DTSortableEntry *E) { + if (Visited.count(E) || !Pred(E)) + return; + Visited.insert(E); + + // Traversing deps graph in post-order allows us to get rid of + // register aliases preprocessing. + // But pre-order is required for correct processing of function + // declaration and arguments processing. + if (!UsePreOrder) + for (auto *S : E->getDeps()) + RecHoistUtil(S); + + Register GlobalReg = Register::index2VirtReg(MAI.getNextID()); + bool IsFirst = true; + for (auto &U : *E) { + const MachineFunction *MF = U.first; + Register Reg = U.second; + MAI.setRegisterAlias(MF, Reg, GlobalReg); + if (!MF->getRegInfo().getUniqueVRegDef(Reg)) + continue; + collectDefInstr(Reg, MF, &MAI, MSType, IsFirst); + IsFirst = false; + if (E->getIsGV()) + MAI.GlobalVarList.push_back(MF->getRegInfo().getUniqueVRegDef(Reg)); + } + + if (UsePreOrder) + for (auto *S : E->getDeps()) + RecHoistUtil(S); + }; + RecHoistUtil(E); + } +} + +// The function initializes global register alias table for types, consts, +// global vars and func decls and collects these instruction for output +// at module level. Also it collects explicit OpExtension/OpCapability +// instructions. +void SPIRVModuleAnalysis::processDefInstrs(const Module &M) { + std::vector<SPIRV::DTSortableEntry *> DepsGraph; + + GR->buildDepsGraph(DepsGraph, SPVDumpDeps ? MMI : nullptr); + + collectGlobalEntities( + DepsGraph, SPIRV::MB_TypeConstVars, + [](const SPIRV::DTSortableEntry *E) { return !E->getIsFunc(); }, false); + + collectGlobalEntities( + DepsGraph, SPIRV::MB_ExtFuncDecls, + [](const SPIRV::DTSortableEntry *E) { return E->getIsFunc(); }, true); +} + // Look for IDs declared with Import linkage, and map the imported name string // to the register defining that variable (which will usually be the result of // an OpFunction). This lets us call externally imported functions using @@ -146,10 +228,9 @@ void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI, // numbering has already occurred by this point. We can directly compare reg // arguments when detecting duplicates. static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI, - SPIRV::ModuleSectionType MSType, - bool IsConstOrType = false) { + SPIRV::ModuleSectionType MSType) { MAI.setSkipEmission(&MI); - if (findSameInstrInMS(MI, MSType, MAI, IsConstOrType, IsConstOrType ? 1 : 0)) + if (findSameInstrInMS(MI, MSType, MAI, false)) return; // Found a duplicate, so don't add it. // No duplicates, so add it. MAI.MS[MSType].push_back(&MI); @@ -163,18 +244,11 @@ void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) { continue; MachineFunction *MF = MMI->getMachineFunction(*F); assert(MF); - unsigned FCounter = 0; for (MachineBasicBlock &MBB : *MF) for (MachineInstr &MI : MBB) { - if (MI.getOpcode() == SPIRV::OpFunction) - FCounter++; if (MAI.getSkipEmission(&MI)) continue; const unsigned OpCode = MI.getOpcode(); - const bool IsFuncOrParm = - OpCode == SPIRV::OpFunction || OpCode == SPIRV::OpFunctionParameter; - const bool IsConstOrType = - TII->isConstantInstr(MI) || TII->isTypeDeclInstr(MI); if (OpCode == SPIRV::OpName || OpCode == SPIRV::OpMemberName) { collectOtherInstr(MI, MAI, SPIRV::MB_DebugNames); } else if (OpCode == SPIRV::OpEntryPoint) { @@ -182,12 +256,6 @@ void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) { } else if (TII->isDecorationInstr(MI)) { collectOtherInstr(MI, MAI, SPIRV::MB_Annotations); collectFuncNames(MI, *F); - } else if (IsConstOrType || (FCounter > 1 && IsFuncOrParm)) { - // Now OpSpecConstant*s are not in DT, - // but they need to be collected anyway. - enum SPIRV::ModuleSectionType Type = - IsFuncOrParm ? SPIRV::MB_ExtFuncDecls : SPIRV::MB_TypeConstVars; - collectOtherInstr(MI, MAI, Type, IsConstOrType); } else if (OpCode == SPIRV::OpFunction) { collectFuncNames(MI, *F); } @@ -239,6 +307,7 @@ bool SPIRVModuleAnalysis::runOnModule(Module &M) { // TODO: Process type/const/global var/func decl instructions, number their // destination registers from 0 to N, collect Extensions and Capabilities. + processDefInstrs(M); // Number rest of registers from N+1 onwards. numberRegistersGlobally(M); diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index 1bef13d458c1..585868909d28 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_SPIRV_SPIRVMODULEANALYSIS_H #include "MCTargetDesc/SPIRVBaseInfo.h" +#include "SPIRVDuplicatesTracker.h" #include "SPIRVSubtarget.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -123,6 +124,11 @@ public: private: void setBaseInfo(const Module &M); template <typename T> void collectTypesConstsVars(); + void collectGlobalEntities( + const std::vector<SPIRV::DTSortableEntry *> &DepsGraph, + SPIRV::ModuleSectionType MSType, + std::function<bool(const SPIRV::DTSortableEntry *)> Pred, + bool UsePreOrder); void processDefInstrs(const Module &M); void collectFuncNames(MachineInstr &MI, const Function &F); void processOtherInstrs(const Module &M); diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index d75d41b35838..ee460002fc58 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -44,12 +44,11 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace { class SparcMCCodeEmitter : public MCCodeEmitter { - const MCInstrInfo &MCII; MCContext &Ctx; public: - SparcMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) - : MCII(mcii), Ctx(ctx) {} + SparcMCCodeEmitter(const MCInstrInfo &, MCContext &ctx) + : Ctx(ctx) {} SparcMCCodeEmitter(const SparcMCCodeEmitter &) = delete; SparcMCCodeEmitter &operator=(const SparcMCCodeEmitter &) = delete; ~SparcMCCodeEmitter() override = default; @@ -84,12 +83,6 @@ public: unsigned getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - -private: - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; }; } // end anonymous namespace @@ -97,9 +90,6 @@ private: void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - verifyInstructionPredicates(MI, - computeAvailableFeatures(STI.getFeatureBits())); - unsigned Bits = getBinaryCodeForInstr(MI, Fixups, STI); support::endian::write(OS, Bits, Ctx.getAsmInfo()->isLittleEndian() ? support::little @@ -253,7 +243,6 @@ getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo, return 0; } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "SparcGenMCCodeEmitter.inc" MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII, diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index 49b75b7e0bd1..b11c786e7856 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -24,6 +24,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "SparcGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index 7ef043d9df40..8e6a9ebdb2dd 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -46,6 +46,7 @@ std::unique_ptr<MCObjectTargetWriter> createSparcELFObjectWriter(bool Is64Bit, // Defines symbolic names for the Sparc instructions. // #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "SparcGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index f6f9c0a1de81..c8961d507c72 100644 --- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -250,6 +250,8 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, } void SparcAsmPrinter::emitInstruction(const MachineInstr *MI) { + Sparc_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); switch (MI->getOpcode()) { default: break; diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index 242f566da2c9..1a71ff28424f 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -150,23 +150,13 @@ private: return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC24DBL, 3, false); } - -private: - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; }; } // end anonymous namespace -void SystemZMCCodeEmitter:: -encodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - verifyInstructionPredicates(MI, - computeAvailableFeatures(STI.getFeatureBits())); - +void SystemZMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { MemOpsEmitted = 0; uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); unsigned Size = MCII.get(MI.getOpcode()).getSize(); @@ -329,7 +319,6 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, return 0; } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "SystemZGenMCCodeEmitter.inc" MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII, diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 03141ecf551d..08886507fdb7 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -23,6 +23,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "SystemZGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index db4485423416..f2bfc9ac48e5 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -95,6 +95,7 @@ std::unique_ptr<MCObjectTargetWriter> createSystemZObjectWriter(uint8_t OSABI); // Defines symbolic names for the SystemZ instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "SystemZGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 6fb080607f51..1d55bf9a5804 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -143,6 +143,9 @@ void SystemZAsmPrinter::emitCallInformation(CallType CT) { } void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { + SystemZ_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + SystemZMCInstLower Lower(MF->getContext(), *this); MCInst LoweredMI; switch (MI->getOpcode()) { diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td index a7ea5e1e4bf8..fdd82a01f211 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -162,11 +162,7 @@ def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>; //===----------------------------------------------------------------------===// // z/OS XPLINK64 callee-saved registers //===----------------------------------------------------------------------===// -// %R7D is volatile by the spec, but it must be saved in the prologue by -// any non-leaf function and restored in the epilogue for use by the -// return instruction so it functions exactly like a callee-saved register. -def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 7, 15), - (sequence "R%dD", 4, 4), +def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 8, 15), (sequence "F%dD", 15, 8))>; def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64, diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 43bc7426cfa8..975eb8862e82 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -918,72 +918,74 @@ bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots( SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); + auto &GRRegClass = SystemZ::GR64BitRegClass; + + // For non-leaf functions: + // - the address of callee (entry point) register R6 must be saved + CSI.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister())); + CSI.back().setRestored(false); + + // The return address register R7 must be saved and restored. + CSI.push_back(CalleeSavedInfo(Regs.getReturnFunctionAddressRegister())); + + // If the function needs a frame pointer, or if the backchain pointer should + // be stored, then save the stack pointer register R4. + if (hasFP(MF) || MF.getFunction().hasFnAttribute("backchain")) + CSI.push_back(CalleeSavedInfo(Regs.getStackPointerRegister())); // Scan the call-saved GPRs and find the bounds of the register spill area. - unsigned LowGPR = 0; - int LowOffset = INT32_MAX; - unsigned HighGPR = LowGPR; + Register LowRestoreGPR = 0; + int LowRestoreOffset = INT32_MAX; + Register LowSpillGPR = 0; + int LowSpillOffset = INT32_MAX; + Register HighGPR = 0; int HighOffset = -1; - unsigned RegSP = Regs.getStackPointerRegister(); - auto &GRRegClass = SystemZ::GR64BitRegClass; - const unsigned RegSize = 8; + for (auto &CS : CSI) { + Register Reg = CS.getReg(); + int Offset = RegSpillOffsets[Reg]; + if (Offset >= 0) { + if (GRRegClass.contains(Reg)) { + if (LowSpillOffset > Offset) { + LowSpillOffset = Offset; + LowSpillGPR = Reg; + } + if (CS.isRestored() && LowRestoreOffset > Offset) { + LowRestoreOffset = Offset; + LowRestoreGPR = Reg; + } - auto ProcessCSI = [&](std::vector<CalleeSavedInfo> &CSIList) { - for (auto &CS : CSIList) { - Register Reg = CS.getReg(); - int Offset = RegSpillOffsets[Reg]; - if (Offset >= 0) { - if (GRRegClass.contains(Reg)) { - if (LowOffset > Offset) { - LowOffset = Offset; - LowGPR = Reg; - } - - if (Offset > HighOffset) { - HighOffset = Offset; - HighGPR = Reg; - } + if (Offset > HighOffset) { + HighOffset = Offset; + HighGPR = Reg; } + // Non-volatile GPRs are saved in the dedicated register save area at + // the bottom of the stack and are not truly part of the "normal" stack + // frame. Mark the frame index as NoAlloc to indicate it as such. + unsigned RegSize = 8; int FrameIdx = MFFrame.CreateFixedSpillStackObject(RegSize, Offset); CS.setFrameIdx(FrameIdx); - } else - CS.setFrameIdx(INT32_MAX); + MFFrame.setStackID(FrameIdx, TargetStackID::NoAlloc); + } + } else { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + Align Alignment = TRI->getSpillAlign(*RC); + unsigned Size = TRI->getSpillSize(*RC); + Alignment = std::min(Alignment, getStackAlign()); + int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, true); + CS.setFrameIdx(FrameIdx); } - }; - - std::vector<CalleeSavedInfo> Spills; - - // For non-leaf functions: - // - the address of callee (entry point) register R6 must be saved - Spills.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister())); - - // If the function needs a frame pointer, or if the backchain pointer should - // be stored, then save the stack pointer register R4. - if (hasFP(MF) || MF.getFunction().hasFnAttribute("backchain")) - Spills.push_back(CalleeSavedInfo(RegSP)); + } // Save the range of call-saved registers, for use by the // prologue/epilogue inserters. - ProcessCSI(CSI); - MFI->setRestoreGPRRegs(LowGPR, HighGPR, LowOffset); + if (LowRestoreGPR) + MFI->setRestoreGPRRegs(LowRestoreGPR, HighGPR, LowRestoreOffset); // Save the range of call-saved registers, for use by the epilogue inserter. - ProcessCSI(Spills); - MFI->setSpillGPRRegs(LowGPR, HighGPR, LowOffset); - - // Create spill slots for the remaining registers. - for (auto &CS : CSI) { - if (CS.getFrameIdx() != INT32_MAX) - continue; - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - Align Alignment = TRI->getSpillAlign(*RC); - unsigned Size = TRI->getSpillSize(*RC); - Alignment = std::min(Alignment, getStackAlign()); - int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, true); - CS.setFrameIdx(FrameIdx); - } + assert(LowSpillGPR && "Expected registers to spill"); + MFI->setSpillGPRRegs(LowSpillGPR, HighGPR, LowSpillOffset); return true; } @@ -1001,13 +1003,6 @@ void SystemZXPLINKFrameLowering::determineCalleeSaves(MachineFunction &MF, // frame pointer will be clobbered. if (HasFP) SavedRegs.set(Regs.getFramePointerRegister()); - - // If the function is not an XPLeaf function, we need to save the - // return address register. We also always use that register for - // the return instruction, so it needs to be restored in the - // epilogue even though that register is considered to be volatile. - // #TODO: Implement leaf detection. - SavedRegs.set(Regs.getReturnFunctionAddressRegister()); } bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters( diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp index 8f633adbb9ef..29cc2840310d 100644 --- a/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -240,6 +240,13 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalObject *GO, return SectionKind::getBSS(); } + // Global variables with '!exclude' should get the exclude section kind if + // they have an explicit section and no other metadata. + if (GVar->hasSection()) + if (MDNode *MD = GVar->getMetadata(LLVMContext::MD_exclude)) + if (!MD->getNumOperands()) + return SectionKind::getExclude(); + // If the global is marked constant, we can put it into a mergable section, // a mergable string section, or general .data if it contains relocations. if (GVar->isConstant()) { diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp index 3eb246f73679..45facd34f84e 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp @@ -39,12 +39,11 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace { class VEMCCodeEmitter : public MCCodeEmitter { - const MCInstrInfo &MCII; MCContext &Ctx; public: - VEMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) - : MCII(mcii), Ctx(ctx) {} + VEMCCodeEmitter(const MCInstrInfo &, MCContext &ctx) + : Ctx(ctx) {} VEMCCodeEmitter(const VEMCCodeEmitter &) = delete; VEMCCodeEmitter &operator=(const VEMCCodeEmitter &) = delete; ~VEMCCodeEmitter() override = default; @@ -74,12 +73,6 @@ public: uint64_t getRDOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - -private: - FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; - void - verifyInstructionPredicates(const MCInst &MI, - const FeatureBitset &AvailableFeatures) const; }; } // end anonymous namespace @@ -87,9 +80,6 @@ private: void VEMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - verifyInstructionPredicates(MI, - computeAvailableFeatures(STI.getFeatureBits())); - uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); support::endian::write<uint64_t>(OS, Bits, support::little); @@ -155,7 +145,6 @@ uint64_t VEMCCodeEmitter::getRDOpValue(const MCInst &MI, unsigned OpNo, return 0; } -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "VEGenMCCodeEmitter.inc" MCCodeEmitter *llvm::createVEMCCodeEmitter(const MCInstrInfo &MCII, diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp index f4fbf763e59c..5a562d77f941 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp @@ -24,6 +24,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "VEGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h index d8f9d0634c24..935a0bfc0c4c 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h @@ -44,6 +44,7 @@ std::unique_ptr<MCObjectTargetWriter> createVEELFObjectWriter(uint8_t OSABI); // Defines symbolic names for the VE instructions. // #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "VEGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/VE/VEAsmPrinter.cpp b/llvm/lib/Target/VE/VEAsmPrinter.cpp index af69d04a17ca..5553087d6f47 100644 --- a/llvm/lib/Target/VE/VEAsmPrinter.cpp +++ b/llvm/lib/Target/VE/VEAsmPrinter.cpp @@ -325,6 +325,8 @@ void VEAsmPrinter::lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI, } void VEAsmPrinter::emitInstruction(const MachineInstr *MI) { + VE_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); switch (MI->getOpcode()) { default: diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index 85285749b4fa..e54453b31354 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -325,22 +325,22 @@ def VEMEMziiAsmOperand : AsmOperandClass { // ASX format uses single assembly instruction format. def MEMrri : Operand<iPTR> { let PrintMethod = "printMemASXOperand"; - let MIOperandInfo = (ops ptr_rc, ptr_rc, i32imm); + let MIOperandInfo = (ops ptr_rc, ptr_rc, i64imm); let ParserMatchClass = VEMEMrriAsmOperand; } def MEMrii : Operand<iPTR> { let PrintMethod = "printMemASXOperand"; - let MIOperandInfo = (ops ptr_rc, i32imm, i32imm); + let MIOperandInfo = (ops ptr_rc, i32imm, i64imm); let ParserMatchClass = VEMEMriiAsmOperand; } def MEMzri : Operand<iPTR> { let PrintMethod = "printMemASXOperand"; - let MIOperandInfo = (ops i32imm /* = 0 */, ptr_rc, i32imm); + let MIOperandInfo = (ops i32imm /* = 0 */, ptr_rc, i64imm); let ParserMatchClass = VEMEMzriAsmOperand; } def MEMzii : Operand<iPTR> { let PrintMethod = "printMemASXOperand"; - let MIOperandInfo = (ops i32imm /* = 0 */, i32imm, i32imm); + let MIOperandInfo = (ops i32imm /* = 0 */, i32imm, i64imm); let ParserMatchClass = VEMEMziiAsmOperand; } diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp index d175ad26c742..f334af128162 100644 --- a/llvm/lib/Target/VE/VERegisterInfo.cpp +++ b/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -27,6 +27,8 @@ using namespace llvm; +#define DEBUG_TYPE "ve-register-info" + #define GET_REGINFO_TARGET_DESC #include "VEGenRegisterInfo.inc" @@ -133,66 +135,179 @@ static unsigned offsetToDisp(MachineInstr &MI) { return OffDisp; } -static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II, - MachineInstr &MI, const DebugLoc &dl, - unsigned FIOperandNum, int Offset, Register FrameReg) { - // Replace frame index with a frame pointer reference directly. - // VE has 32 bit offset field, so no need to expand a target instruction. - // Directly encode it. +class EliminateFrameIndex { + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + const DebugLoc &DL; + MachineBasicBlock &MBB; + MachineBasicBlock::iterator II; + Register clobber; + + // Some helper functions for the ease of instruction building. + MachineFunction &getFunc() const { return *MBB.getParent(); } + inline MCRegister getSubReg(MCRegister Reg, unsigned Idx) const { + return TRI.getSubReg(Reg, Idx); + } + inline const MCInstrDesc &get(unsigned Opcode) const { + return TII.get(Opcode); + } + inline MachineInstrBuilder build(const MCInstrDesc &MCID, Register DestReg) { + return BuildMI(MBB, II, DL, MCID, DestReg); + } + inline MachineInstrBuilder build(unsigned InstOpc, Register DestReg) { + return build(get(InstOpc), DestReg); + } + inline MachineInstrBuilder build(const MCInstrDesc &MCID) { + return BuildMI(MBB, II, DL, MCID); + } + inline MachineInstrBuilder build(unsigned InstOpc) { + return build(get(InstOpc)); + } + + // Calculate an address of frame index from a frame register and a given + // offset if the offset doesn't fit in the immediate field. Use a clobber + // register to hold calculated address. + void prepareReplaceFI(MachineInstr &MI, Register &FrameReg, int64_t &Offset, + int64_t Bytes = 0); + // Replace the frame index in \p MI with a frame register and a given offset + // if it fits in the immediate field. Otherwise, use pre-calculated address + // in a clobber regsiter. + void replaceFI(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + + // Expand and eliminate Frame Index of pseudo STQrii and LDQrii. + void processSTQ(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + void processLDQ(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + +public: + EliminateFrameIndex(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, + const DebugLoc &DL, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II) + : TII(TII), TRI(TRI), DL(DL), MBB(MBB), II(II), clobber(VE::SX13) {} + + // Expand and eliminate Frame Index from MI + void processMI(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); +}; + +// Prepare the frame index if it doesn't fit in the immediate field. Use +// clobber register to hold calculated address. +void EliminateFrameIndex::prepareReplaceFI(MachineInstr &MI, Register &FrameReg, + int64_t &Offset, int64_t Bytes) { + if (isInt<32>(Offset) && isInt<32>(Offset + Bytes)) { + // If the offset is small enough to fit in the immediate field, directly + // encode it. So, nothing to prepare here. + return; + } + + // If the offset doesn't fit, emit following codes. This clobbers SX13 + // which we always know is available here. + // lea %clobber, Offset@lo + // and %clobber, %clobber, (32)0 + // lea.sl %clobber, Offset@hi(FrameReg, %clobber) + build(VE::LEAzii, clobber).addImm(0).addImm(0).addImm(Lo_32(Offset)); + build(VE::ANDrm, clobber).addReg(clobber).addImm(M0(32)); + build(VE::LEASLrri, clobber) + .addReg(clobber) + .addReg(FrameReg) + .addImm(Hi_32(Offset)); + + // Use clobber register as a frame register and 0 offset + FrameReg = clobber; + Offset = 0; +} + +// Replace the frame index in \p MI with a proper byte and framereg offset. +void EliminateFrameIndex::replaceFI(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(isInt<32>(Offset)); + + // The offset must be small enough to fit in the immediate field after + // call of prepareReplaceFI. Therefore, we directly encode it. MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); MI.getOperand(FIOperandNum + offsetToDisp(MI)).ChangeToImmediate(Offset); } +void EliminateFrameIndex::processSTQ(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::STQrii); + LLVM_DEBUG(dbgs() << "processSTQ: "; MI.dump()); + + prepareReplaceFI(MI, FrameReg, Offset, 8); + + Register SrcReg = MI.getOperand(3).getReg(); + Register SrcHiReg = getSubReg(SrcReg, VE::sub_even); + Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd); + // VE stores HiReg to 8(addr) and LoReg to 0(addr) + MachineInstr *StMI = + build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg(SrcLoReg); + replaceFI(*StMI, FrameReg, Offset, 0); + // Mutate to 'hi' store. + MI.setDesc(get(VE::STrii)); + MI.getOperand(3).setReg(SrcHiReg); + Offset += 8; + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + +void EliminateFrameIndex::processLDQ(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::LDQrii); + LLVM_DEBUG(dbgs() << "processLDQ: "; MI.dump()); + + prepareReplaceFI(MI, FrameReg, Offset, 8); + + Register DestReg = MI.getOperand(0).getReg(); + Register DestHiReg = getSubReg(DestReg, VE::sub_even); + Register DestLoReg = getSubReg(DestReg, VE::sub_odd); + // VE loads HiReg from 8(addr) and LoReg from 0(addr) + MachineInstr *StMI = + build(VE::LDrii, DestLoReg).addReg(FrameReg).addImm(0).addImm(0); + replaceFI(*StMI, FrameReg, Offset, 1); + MI.setDesc(get(VE::LDrii)); + MI.getOperand(0).setReg(DestHiReg); + Offset += 8; + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + +void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + switch (MI.getOpcode()) { + case VE::STQrii: + processSTQ(MI, FrameReg, Offset, FIOperandNum); + return; + case VE::LDQrii: + processLDQ(MI, FrameReg, Offset, FIOperandNum); + return; + } + prepareReplaceFI(MI, FrameReg, Offset); + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; - DebugLoc dl = MI.getDebugLoc(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + MachineFunction &MF = *MI.getParent()->getParent(); - const VEFrameLowering *TFI = getFrameLowering(MF); + const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>(); + const VEFrameLowering &TFI = *getFrameLowering(MF); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + const VERegisterInfo &TRI = *Subtarget.getRegisterInfo(); + DebugLoc DL = MI.getDebugLoc(); + EliminateFrameIndex EFI(TII, TRI, DL, *MI.getParent(), II); + // Retrieve FrameReg and byte offset for stack slot. Register FrameReg; - int Offset; - Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed(); - + int64_t Offset = + TFI.getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed(); Offset += MI.getOperand(FIOperandNum + offsetToDisp(MI)).getImm(); - if (MI.getOpcode() == VE::STQrii) { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - Register SrcReg = MI.getOperand(3).getReg(); - Register SrcHiReg = getSubReg(SrcReg, VE::sub_even); - Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd); - // VE stores HiReg to 8(addr) and LoReg to 0(addr) - MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STrii)) - .addReg(FrameReg) - .addImm(0) - .addImm(0) - .addReg(SrcLoReg); - replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg); - MI.setDesc(TII.get(VE::STrii)); - MI.getOperand(3).setReg(SrcHiReg); - Offset += 8; - } else if (MI.getOpcode() == VE::LDQrii) { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - Register DestReg = MI.getOperand(0).getReg(); - Register DestHiReg = getSubReg(DestReg, VE::sub_even); - Register DestLoReg = getSubReg(DestReg, VE::sub_odd); - // VE loads HiReg from 8(addr) and LoReg from 0(addr) - MachineInstr *StMI = - BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDrii), DestLoReg) - .addReg(FrameReg) - .addImm(0) - .addImm(0); - replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg); - MI.setDesc(TII.get(VE::LDrii)); - MI.getOperand(0).setReg(DestHiReg); - Offset += 8; - } - - replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg); + EFI.processMI(MI, FrameReg, Offset, FIOperandNum); } Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp index 330eef4c7c2b..f88f298bc603 100644 --- a/llvm/lib/Target/VE/VVPISelLowering.cpp +++ b/llvm/lib/Target/VE/VVPISelLowering.cpp @@ -41,7 +41,7 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { auto VVPOpcodeOpt = getVVPOpcode(Opcode); if (!VVPOpcodeOpt) return SDValue(); - unsigned VVPOpcode = VVPOpcodeOpt.getValue(); + unsigned VVPOpcode = VVPOpcodeOpt.value(); const bool FromVP = ISD::isVPOpcode(Opcode); // The representative and legalized vector type of this operation. diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp index ec72c1de0503..d31715e367ec 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp @@ -87,15 +87,14 @@ bool WebAssemblyAsmTypeCheck::popType(SMLoc ErrorLoc, if (Stack.empty()) { return typeError(ErrorLoc, EVT ? StringRef("empty stack while popping ") + - WebAssembly::typeToString(EVT.getValue()) + WebAssembly::typeToString(EVT.value()) : StringRef("empty stack while popping value")); } auto PVT = Stack.pop_back_val(); - if (EVT && EVT.getValue() != PVT) { + if (EVT && EVT.value() != PVT) { return typeError( ErrorLoc, StringRef("popped ") + WebAssembly::typeToString(PVT) + - ", expected " + - WebAssembly::typeToString(EVT.getValue())); + ", expected " + WebAssembly::typeToString(EVT.value())); } return false; } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index f52545a65dbb..97dbc35c991b 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -26,6 +26,7 @@ using namespace llvm; #define DEBUG_TYPE "wasm-mc-target-desc" #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "WebAssemblyGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 75d5d0675990..b5b12200505b 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -124,6 +124,7 @@ enum TOF { // Defines symbolic names for the WebAssembly instructions. // #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "WebAssemblyGenInstrInfo.inc" namespace llvm { diff --git a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp index e3daf6bfa72e..ef2c77ade8cc 100644 --- a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp +++ b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp @@ -37,4 +37,5 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTargetInfo() { // which have to be in a shared location between CodeGen and MC. #define GET_INSTRMAP_INFO 1 #define GET_INSTRINFO_ENUM 1 +#define GET_INSTRINFO_MC_HELPER_DECLS #include "WebAssemblyGenInstrInfo.inc" diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp index 0f1655718481..f380b2582c65 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp @@ -13,6 +13,7 @@ #include "WebAssemblyTypeUtilities.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" // Get register classes enum. #define GET_REGINFO_ENUM @@ -168,6 +169,11 @@ wasm::ValType WebAssembly::regClassToValType(unsigned RC) { } } +wasm::ValType WebAssembly::regClassToValType(const TargetRegisterClass *RC) { + assert(RC != nullptr); + return regClassToValType(RC->getID()); +} + void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT, const SmallVector<MVT, 1> &VTs) { assert(!Sym->getType()); @@ -175,33 +181,28 @@ void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT, // Tables are represented as Arrays in LLVM IR therefore // they reach this point as aggregate Array types with an element type // that is a reference type. - wasm::ValType Type; + wasm::ValType ValTy; bool IsTable = false; if (GlobalVT->isArrayTy() && WebAssembly::isRefType(GlobalVT->getArrayElementType())) { - MVT VT; IsTable = true; - switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) { - case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF: - VT = MVT::funcref; - break; - case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF: - VT = MVT::externref; - break; - default: - report_fatal_error("unhandled address space type"); - } - Type = WebAssembly::toValType(VT); + const Type *ElTy = GlobalVT->getArrayElementType(); + if (WebAssembly::isExternrefType(ElTy)) + ValTy = wasm::ValType::EXTERNREF; + else if (WebAssembly::isFuncrefType(ElTy)) + ValTy = wasm::ValType::FUNCREF; + else + report_fatal_error("unhandled reference type"); } else if (VTs.size() == 1) { - Type = WebAssembly::toValType(VTs[0]); + ValTy = WebAssembly::toValType(VTs[0]); } else report_fatal_error("Aggregate globals not yet implemented"); if (IsTable) { Sym->setType(wasm::WASM_SYMBOL_TYPE_TABLE); - Sym->setTableType(Type); + Sym->setTableType(ValTy); } else { Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); - Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true}); + Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(ValTy), /*Mutable=*/true}); } } diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h index 8fc67d37925c..86211700c70a 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h @@ -22,6 +22,9 @@ #include "llvm/Support/MachineValueType.h" namespace llvm { + +class TargetRegisterClass; + namespace WebAssembly { /// Used as immediate MachineOperands for block signatures @@ -108,9 +111,12 @@ std::string signatureToString(const wasm::WasmSignature *Sig); // Convert a MVT into its corresponding wasm ValType. wasm::ValType toValType(MVT Type); -// Convert a register class to a wasm ValType. +// Convert a register class ID to a wasm ValType. wasm::ValType regClassToValType(unsigned RC); +// Convert a register class to a wasm ValType. +wasm::ValType regClassToValType(const TargetRegisterClass *RC); + /// Sets a Wasm Symbol Type. void wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT, const SmallVector<MVT, 1> &VTs); diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp index b87c884c9e4a..277bbee83a6f 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp @@ -179,3 +179,25 @@ MachineInstr *WebAssembly::findCatch(MachineBasicBlock *EHPad) { return &*Pos; return nullptr; } + +unsigned WebAssembly::getCopyOpcodeForRegClass(const TargetRegisterClass *RC) { + assert(RC != nullptr); + switch (RC->getID()) { + case WebAssembly::I32RegClassID: + return WebAssembly::COPY_I32; + case WebAssembly::I64RegClassID: + return WebAssembly::COPY_I64; + case WebAssembly::F32RegClassID: + return WebAssembly::COPY_F32; + case WebAssembly::F64RegClassID: + return WebAssembly::COPY_F64; + case WebAssembly::V128RegClassID: + return WebAssembly::COPY_V128; + case WebAssembly::FUNCREFRegClassID: + return WebAssembly::COPY_FUNCREF; + case WebAssembly::EXTERNREFRegClassID: + return WebAssembly::COPY_EXTERNREF; + default: + llvm_unreachable("Unexpected register class"); + } +} diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h index cdfc758db7ac..d0639208fda9 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h @@ -24,6 +24,7 @@ class MachineInstr; class MachineOperand; class MCContext; class MCSymbolWasm; +class TargetRegisterClass; class WebAssemblyFunctionInfo; class WebAssemblySubtarget; @@ -65,6 +66,9 @@ getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, /// instruction found or the catch is in an invalid location. MachineInstr *findCatch(MachineBasicBlock *EHPad); +/// Returns the appropriate copy opcode for the given register class. +unsigned getCopyOpcodeForRegClass(const TargetRegisterClass *RC); + } // end namespace WebAssembly } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 57d51634e849..bcb6cf1b4e1d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -597,6 +597,8 @@ void WebAssemblyAsmPrinter::emitFunctionBodyStart() { void WebAssemblyAsmPrinter::emitInstruction(const MachineInstr *MI) { LLVM_DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n'); + WebAssembly_MC::verifyInstructionPredicates(MI->getOpcode(), + Subtarget->getFeatureBits()); switch (MI->getOpcode()) { case WebAssembly::ARGUMENT_i32: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 02e873a0f9a6..d2eb4b29e9fd 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -781,25 +781,6 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { } } -// Get the appropriate copy opcode for the given register class. -static unsigned getCopyOpcode(const TargetRegisterClass *RC) { - if (RC == &WebAssembly::I32RegClass) - return WebAssembly::COPY_I32; - if (RC == &WebAssembly::I64RegClass) - return WebAssembly::COPY_I64; - if (RC == &WebAssembly::F32RegClass) - return WebAssembly::COPY_F32; - if (RC == &WebAssembly::F64RegClass) - return WebAssembly::COPY_F64; - if (RC == &WebAssembly::V128RegClass) - return WebAssembly::COPY_V128; - if (RC == &WebAssembly::FUNCREFRegClass) - return WebAssembly::COPY_FUNCREF; - if (RC == &WebAssembly::EXTERNREFRegClass) - return WebAssembly::COPY_EXTERNREF; - llvm_unreachable("Unexpected register class"); -} - // When MBB is split into MBB and Split, we should unstackify defs in MBB that // have their uses in Split. static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, @@ -851,7 +832,8 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, if (!MFI.isVRegStackified(TeeReg)) { // Now we are not using TEE anymore, so unstackify DefReg too MFI.unstackifyVReg(DefReg); - unsigned CopyOpc = getCopyOpcode(MRI.getRegClass(DefReg)); + unsigned CopyOpc = + WebAssembly::getCopyOpcodeForRegClass(MRI.getRegClass(DefReg)); BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), TeeReg) .addReg(DefReg); BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), Reg).addReg(DefReg); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 5484c0db7775..9316826e3d92 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -66,23 +66,7 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, ? MRI.getRegClass(DestReg) : MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(DestReg); - unsigned CopyOpcode; - if (RC == &WebAssembly::I32RegClass) - CopyOpcode = WebAssembly::COPY_I32; - else if (RC == &WebAssembly::I64RegClass) - CopyOpcode = WebAssembly::COPY_I64; - else if (RC == &WebAssembly::F32RegClass) - CopyOpcode = WebAssembly::COPY_F32; - else if (RC == &WebAssembly::F64RegClass) - CopyOpcode = WebAssembly::COPY_F64; - else if (RC == &WebAssembly::V128RegClass) - CopyOpcode = WebAssembly::COPY_V128; - else if (RC == &WebAssembly::FUNCREFRegClass) - CopyOpcode = WebAssembly::COPY_FUNCREF; - else if (RC == &WebAssembly::EXTERNREFRegClass) - CopyOpcode = WebAssembly::COPY_EXTERNREF; - else - llvm_unreachable("Unexpected register class"); + unsigned CopyOpcode = WebAssembly::getCopyOpcodeForRegClass(RC); BuildMI(MBB, I, DL, get(CopyOpcode), DestReg) .addReg(SrcReg, KillSrc ? RegState::Kill : 0); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 2db4bd822349..7a1a769c6b16 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -553,7 +553,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) { std::tie(SizeArg, NEltArg) = FnAttrs.getAllocSizeArgs(); SizeArg += 1; if (NEltArg) - NEltArg = NEltArg.getValue() + 1; + NEltArg = NEltArg.value() + 1; FnAttrs.addAllocSizeAttr(SizeArg, NEltArg); } // In case the callee has 'noreturn' attribute, We need to remove it, because diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 2e6027a5605c..e8b3542df12f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -154,25 +154,6 @@ MCOperand WebAssemblyMCInstLower::lowerTypeIndexOperand( return MCOperand::createExpr(Expr); } -// Return the WebAssembly type associated with the given register class. -static wasm::ValType getType(const TargetRegisterClass *RC) { - if (RC == &WebAssembly::I32RegClass) - return wasm::ValType::I32; - if (RC == &WebAssembly::I64RegClass) - return wasm::ValType::I64; - if (RC == &WebAssembly::F32RegClass) - return wasm::ValType::F32; - if (RC == &WebAssembly::F64RegClass) - return wasm::ValType::F64; - if (RC == &WebAssembly::V128RegClass) - return wasm::ValType::V128; - if (RC == &WebAssembly::EXTERNREFRegClass) - return wasm::ValType::EXTERNREF; - if (RC == &WebAssembly::FUNCREFRegClass) - return wasm::ValType::FUNCREF; - llvm_unreachable("Unexpected register class"); -} - static void getFunctionReturns(const MachineInstr *MI, SmallVectorImpl<wasm::ValType> &Returns) { const Function &F = MI->getMF()->getFunction(); @@ -221,10 +202,12 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); for (const MachineOperand &MO : MI->defs()) - Returns.push_back(getType(MRI.getRegClass(MO.getReg()))); + Returns.push_back( + WebAssembly::regClassToValType(MRI.getRegClass(MO.getReg()))); for (const MachineOperand &MO : MI->explicit_uses()) if (MO.isReg()) - Params.push_back(getType(MRI.getRegClass(MO.getReg()))); + Params.push_back( + WebAssembly::regClassToValType(MRI.getRegClass(MO.getReg()))); // call_indirect instructions have a callee operand at the end which // doesn't count as a param. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index ba1c4b7233f2..5fcee7af9bde 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "Utils/WebAssemblyUtilities.h" #include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" @@ -95,31 +96,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, if (!MFI.isVRegStackified(Reg)) { unsigned CopyLocalOpc; const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); - switch (RegClass->getID()) { - case WebAssembly::I32RegClassID: - CopyLocalOpc = WebAssembly::COPY_I32; - break; - case WebAssembly::I64RegClassID: - CopyLocalOpc = WebAssembly::COPY_I64; - break; - case WebAssembly::F32RegClassID: - CopyLocalOpc = WebAssembly::COPY_F32; - break; - case WebAssembly::F64RegClassID: - CopyLocalOpc = WebAssembly::COPY_F64; - break; - case WebAssembly::V128RegClassID: - CopyLocalOpc = WebAssembly::COPY_V128; - break; - case WebAssembly::FUNCREFRegClassID: - CopyLocalOpc = WebAssembly::COPY_FUNCREF; - break; - case WebAssembly::EXTERNREFRegClassID: - CopyLocalOpc = WebAssembly::COPY_EXTERNREF; - break; - default: - llvm_unreachable("Unexpected register class for return operand"); - } + CopyLocalOpc = WebAssembly::getCopyOpcodeForRegClass(RegClass); Register NewReg = MRI.createVirtualRegister(RegClass); BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg) .addReg(Reg); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index 388c0f9110b7..0b3e534315d5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -21,7 +21,6 @@ #include "WebAssemblyRuntimeLibcallSignatures.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/RuntimeLibcalls.h" -#include "llvm/Support/ManagedStatic.h" using namespace llvm; @@ -482,10 +481,13 @@ struct RuntimeLibcallSignatureTable { } }; -ManagedStatic<RuntimeLibcallSignatureTable> RuntimeLibcallSignatures; +RuntimeLibcallSignatureTable &getRuntimeLibcallSignatures() { + static RuntimeLibcallSignatureTable RuntimeLibcallSignatures; + return RuntimeLibcallSignatures; +} // Maps libcall names to their RTLIB::Libcall number. Builds the map in a -// constructor for use with ManagedStatic +// constructor for use with a static variable struct StaticLibcallNameMap { StringMap<RTLIB::Libcall> Map; StaticLibcallNameMap() { @@ -496,7 +498,8 @@ struct StaticLibcallNameMap { }; for (const auto &NameLibcall : NameLibcalls) { if (NameLibcall.first != nullptr && - RuntimeLibcallSignatures->Table[NameLibcall.second] != unsupported) { + getRuntimeLibcallSignatures().Table[NameLibcall.second] != + unsupported) { assert(Map.find(NameLibcall.first) == Map.end() && "duplicate libcall names in name map"); Map[NameLibcall.first] = NameLibcall.second; @@ -523,7 +526,7 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, wasm::ValType PtrTy = Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32; - auto &Table = RuntimeLibcallSignatures->Table; + auto &Table = getRuntimeLibcallSignatures().Table; switch (Table[LC]) { case func: break; @@ -885,14 +888,14 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, } } -static ManagedStatic<StaticLibcallNameMap> LibcallNameMap; // TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed // other than here, just roll its logic into this version. void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, StringRef Name, SmallVectorImpl<wasm::ValType> &Rets, SmallVectorImpl<wasm::ValType> &Params) { - auto &Map = LibcallNameMap->Map; + static StaticLibcallNameMap LibcallNameMap; + auto &Map = LibcallNameMap.Map; auto Val = Map.find(Name); #ifndef NDEBUG if (Val == Map.end()) { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp index a903c5f455a2..da90befb2320 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp @@ -622,7 +622,7 @@ static bool printFMAComments(const MCInst *MI, raw_ostream &OS, OS << '-'; OS << '(' << Mul1Name << " * " << Mul2Name << ") " << AccStr << ' ' - << AccName; + << AccName << '\n'; return true; } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp index 901082ce6cf3..640efd468135 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp @@ -13,6 +13,7 @@ #include "X86InstrRelaxTables.h" #include "X86InstrInfo.h" #include "llvm/ADT/STLExtras.h" +#include <atomic> using namespace llvm; @@ -119,7 +120,7 @@ const X86InstrRelaxTableEntry *llvm::lookupRelaxTable(unsigned ShortOp) { namespace { // This class stores the short form tables. It is instantiated as a -// ManagedStatic to lazily init the short form table. +// function scope static variable to lazily init the short form table. struct X86ShortFormTable { // Stores relaxation table entries sorted by relaxed form opcode. SmallVector<X86InstrRelaxTableEntry, 0> Table; @@ -137,10 +138,9 @@ struct X86ShortFormTable { }; } // namespace -static ManagedStatic<X86ShortFormTable> ShortTable; - const X86InstrRelaxTableEntry *llvm::lookupShortTable(unsigned RelaxOp) { - auto &Table = ShortTable->Table; + static X86ShortFormTable ShortTable; + auto &Table = ShortTable.Table; auto I = llvm::lower_bound(Table, RelaxOp); if (I != Table.end() && I->KeyOp == RelaxOp) return &*I; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 49660883ad83..4c962de16530 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -37,6 +37,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC #define GET_INSTRINFO_MC_HELPERS +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "X86GenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 7344900f2e31..0ac916527495 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -132,6 +132,9 @@ FunctionPass *createX86EvexToVexInsts(); /// This pass creates the thunks for the retpoline feature. FunctionPass *createX86IndirectThunksPass(); +/// This pass replaces ret instructions with jmp's to __x86_return thunk. +FunctionPass *createX86ReturnThunksPass(); + /// This pass ensures instructions featuring a memory operand /// have distinctive <LineNumber, Discriminator> (with respect to eachother) FunctionPass *createX86DiscriminateMemOpsPass(); @@ -185,6 +188,7 @@ void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &); void initializeX86PreAMXConfigPassPass(PassRegistry &); void initializeX86LowerTileCopyPass(PassRegistry &); void initializeX86LowerAMXIntrinsicsLegacyPassPass(PassRegistry &); +void initializeX86ReturnThunksPass(PassRegistry &); namespace X86AS { enum : unsigned { diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index a5c6b40c493c..a859176220c7 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -266,6 +266,8 @@ def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", "Write Back No Invalidate">; def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", "Support RDPID instructions">; +def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true", + "Support RDPRU instructions">; def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", "Wait and pause enhancements">; def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", @@ -1238,6 +1240,7 @@ def ProcessorFeatures { TuningInsertVZEROUPPER]; list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, FeatureRDPID, + FeatureRDPRU, FeatureWBNOINVD]; list<SubtargetFeature> ZN2Tuning = ZNTuning; list<SubtargetFeature> ZN2Features = diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp index c7a013a0b17a..cff95d17c14c 100644 --- a/llvm/lib/Target/X86/X86EvexToVex.cpp +++ b/llvm/lib/Target/X86/X86EvexToVex.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Pass.h" +#include <atomic> #include <cassert> #include <cstdint> diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 61c1fd25031d..12af6087cb47 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -594,7 +594,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Half type will be promoted by default. setOperationAction(ISD::FABS, MVT::f16, Promote); setOperationAction(ISD::FNEG, MVT::f16, Promote); - setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); setOperationAction(ISD::FADD, MVT::f16, Promote); setOperationAction(ISD::FSUB, MVT::f16, Promote); setOperationAction(ISD::FMUL, MVT::f16, Promote); @@ -629,6 +629,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall); setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); + + setOperationAction(ISD::STRICT_FADD, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FSUB, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMUL, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FDIV, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMA, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMINIMUM, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FEXP, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, LibCall); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); @@ -2817,6 +2845,21 @@ Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { AddressSpace = X86AS::FS; else if (GuardReg == "gs") AddressSpace = X86AS::GS; + + // Use symbol guard if user specify. + StringRef GuardSymb = M->getStackProtectorGuardSymbol(); + if (!GuardSymb.empty()) { + GlobalVariable *GV = M->getGlobalVariable(GuardSymb); + if (!GV) { + Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext()) + : Type::getInt32Ty(M->getContext()); + GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, + nullptr, GuardSymb, nullptr, + GlobalValue::NotThreadLocal, AddressSpace); + } + return GV; + } + return SegmentOffset(IRB, Offset, AddressSpace); } } @@ -11757,15 +11800,17 @@ static bool isShuffleEquivalent(ArrayRef<int> Mask, ArrayRef<int> ExpectedMask, /// value in ExpectedMask is always accepted. Otherwise the indices must match. /// /// SM_SentinelZero is accepted as a valid negative index but must match in -/// both. +/// both, or via a known bits test. static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask, ArrayRef<int> ExpectedMask, + const SelectionDAG &DAG, SDValue V1 = SDValue(), SDValue V2 = SDValue()) { int Size = Mask.size(); if (Size != (int)ExpectedMask.size()) return false; - assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) && + assert(llvm::all_of(ExpectedMask, + [Size](int M) { return isInRange(M, 0, 2 * Size); }) && "Illegal target shuffle mask"); // Check for out-of-range target shuffle mask indices. @@ -11778,12 +11823,28 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask, if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits()) V2 = SDValue(); + APInt ZeroV1 = APInt::getNullValue(Size); + APInt ZeroV2 = APInt::getNullValue(Size); + for (int i = 0; i < Size; ++i) { int MaskIdx = Mask[i]; int ExpectedIdx = ExpectedMask[i]; if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx) continue; - if (0 <= MaskIdx && 0 <= ExpectedIdx) { + if (MaskIdx == SM_SentinelZero) { + // If we need this expected index to be a zero element, then update the + // relevant zero mask and perform the known bits at the end to minimize + // repeated computes. + SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; + if (ExpectedV && + Size == (int)ExpectedV.getValueType().getVectorNumElements()) { + int BitIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); + APInt &ZeroMask = ExpectedIdx < Size ? ZeroV1 : ZeroV2; + ZeroMask.setBit(BitIdx); + continue; + } + } + if (MaskIdx >= 0) { SDValue MaskV = MaskIdx < Size ? V1 : V2; SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); @@ -11791,15 +11852,16 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask, if (IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx)) continue; } - // TODO - handle SM_Sentinel equivalences. return false; } - return true; + return (ZeroV1.isNullValue() || DAG.MaskedVectorIsZero(V1, ZeroV1)) && + (ZeroV2.isNullValue() || DAG.MaskedVectorIsZero(V2, ZeroV2)); } // Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd // instructions. -static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) { +static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT, + const SelectionDAG &DAG) { if (VT != MVT::v8i32 && VT != MVT::v8f32) return false; @@ -11809,12 +11871,13 @@ static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) { SmallVector<int, 8> Unpckhwd; createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false, /* Unary = */ false); - bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd) || - isTargetShuffleEquivalent(VT, Mask, Unpckhwd)); + bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd, DAG) || + isTargetShuffleEquivalent(VT, Mask, Unpckhwd, DAG)); return IsUnpackwdMask; } -static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) { +static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask, + const SelectionDAG &DAG) { // Create 128-bit vector type based on mask size. MVT EltVT = MVT::getIntegerVT(128 / Mask.size()); MVT VT = MVT::getVectorVT(EltVT, Mask.size()); @@ -11827,8 +11890,8 @@ static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) { for (unsigned i = 0; i != 4; ++i) { SmallVector<int, 16> UnpackMask; createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2); - if (isTargetShuffleEquivalent(VT, Mask, UnpackMask) || - isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask)) + if (isTargetShuffleEquivalent(VT, Mask, UnpackMask, DAG) || + isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask, DAG)) return true; } return false; @@ -12021,7 +12084,7 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, // Attempt to match the target mask against the unpack lo/hi mask patterns. SmallVector<int, 64> Unpckl, Unpckh; createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary); - if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1, + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG, V1, (IsUnary ? V1 : V2))) { UnpackOpcode = X86ISD::UNPCKL; V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); @@ -12030,7 +12093,7 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, } createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary); - if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1, + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG, V1, (IsUnary ? V1 : V2))) { UnpackOpcode = X86ISD::UNPCKH; V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); @@ -12069,14 +12132,14 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, // If a binary shuffle, commute and try again. if (!IsUnary) { ShuffleVectorSDNode::commuteMask(Unpckl); - if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) { + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG)) { UnpackOpcode = X86ISD::UNPCKL; std::swap(V1, V2); return true; } ShuffleVectorSDNode::commuteMask(Unpckh); - if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) { + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG)) { UnpackOpcode = X86ISD::UNPCKH; std::swap(V1, V2); return true; @@ -12464,14 +12527,14 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, // Try binary shuffle. SmallVector<int, 32> BinaryMask; createPackShuffleMask(VT, BinaryMask, false, NumStages); - if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, V1, V2)) + if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, DAG, V1, V2)) if (MatchPACK(V1, V2, PackVT)) return true; // Try unary shuffle. SmallVector<int, 32> UnaryMask; createPackShuffleMask(VT, UnaryMask, true, NumStages); - if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, V1)) + if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, DAG, V1)) if (MatchPACK(V1, V1, PackVT)) return true; } @@ -14283,7 +14346,7 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0, // and a simple narrow shuffle. Prefer extract+unpack(h/l)ps to vpermps // because that avoids a constant load from memory. if (NumElts == 4 && - (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask))) + (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask, DAG))) return SDValue(); // Extend the shuffle mask with undef elements. @@ -17230,7 +17293,7 @@ static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, if (Subtarget.hasAVX2()) { // extract128 + vunpckhps/vshufps, is better than vblend + vpermps. if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() && - !is128BitUnpackShuffleMask(HalfMask) && + !is128BitUnpackShuffleMask(HalfMask, DAG) && (!isSingleSHUFPSMask(HalfMask) || Subtarget.hasFastVariableCrossLaneShuffle())) return SDValue(); @@ -17892,7 +17955,7 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // For non-AVX512 if the Mask is of 16bit elements in lane then try to split // since after split we get a more efficient code using vpunpcklwd and // vpunpckhwd instrs than vblend. - if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32)) + if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32, DAG)) return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG); @@ -17930,7 +17993,7 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // For non-AVX512 if the Mask is of 16bit elements in lane then try to split // since after split we get a more efficient code than vblend by using // vpunpcklwd and vpunpckhwd instrs. - if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() && + if (isUnpackWdShuffleMask(Mask, MVT::v8i32, DAG) && !V2.isUndef() && !Subtarget.hasAVX512()) return lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG); @@ -27887,11 +27950,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, } // Read Performance Monitoring Counters. case RDPMC: + // Read Processor Register. + case RDPRU: // GetExtended Control Register. case XGETBV: { SmallVector<SDValue, 2> Results; // RDPMC uses ECX to select the index of the performance counter to read. + // RDPRU uses ECX to select the processor register to read. // XGETBV uses ECX to select the index of the XCR register to return. // The result is stored into registers EDX:EAX. expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX, @@ -29902,14 +29968,12 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, SDValue Amt01 = DAG.getBitcast(MVT::v8i16, Amt); SDValue Amt23 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, {4, 5, 6, 7, -1, -1, -1, -1}); - Amt0 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, - {0, 1, 1, 1, -1, -1, -1, -1}); - Amt1 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, - {2, 3, 3, 3, -1, -1, -1, -1}); - Amt2 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, - {0, 1, 1, 1, -1, -1, -1, -1}); - Amt3 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, - {2, 3, 3, 3, -1, -1, -1, -1}); + SDValue Msk02 = getV4X86ShuffleImm8ForMask({0, 1, 1, 1}, dl, DAG); + SDValue Msk13 = getV4X86ShuffleImm8ForMask({2, 3, 3, 3}, dl, DAG); + Amt0 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt01, Msk02); + Amt1 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt01, Msk13); + Amt2 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt23, Msk02); + Amt3 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt23, Msk13); } } @@ -30797,6 +30861,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { case AtomicRMWInst::UMin: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. return AtomicExpansionKind::CmpXChg; @@ -32894,6 +32960,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget, Results); return; + case Intrinsic::x86_rdpru: + expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPRU, X86::ECX, Subtarget, + Results); + return; case Intrinsic::x86_xgetbv: expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget, Results); @@ -36985,8 +37055,9 @@ static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT, // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, bool AllowFloatDomain, bool AllowIntDomain, - SDValue V1, const X86Subtarget &Subtarget, - unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) { + SDValue V1, const SelectionDAG &DAG, + const X86Subtarget &Subtarget, unsigned &Shuffle, + MVT &SrcVT, MVT &DstVT) { unsigned NumMaskElts = Mask.size(); unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); @@ -37057,17 +37128,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, // instructions are no slower than UNPCKLPD but has the option to // fold the input operand into even an unaligned memory load. if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) { - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG, V1)) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v2f64; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, DAG, V1)) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, DAG, V1)) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v4f32; return true; @@ -37076,17 +37147,19 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, if (MaskVT.is256BitVector() && AllowFloatDomain) { assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles"); - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, DAG, V1)) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v4f64; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG, + V1)) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v8f32; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, DAG, + V1)) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v8f32; return true; @@ -37096,21 +37169,22 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, if (MaskVT.is512BitVector() && AllowFloatDomain) { assert(Subtarget.hasAVX512() && "AVX512 required for 512-bit vector shuffles"); - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG, + V1)) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v8f64; return true; } if (isTargetShuffleEquivalent( MaskVT, Mask, - {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) { + {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, DAG, V1)) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v16f32; return true; } if (isTargetShuffleEquivalent( MaskVT, Mask, - {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) { + {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, DAG, V1)) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v16f32; return true; @@ -37126,6 +37200,7 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask, const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, + const SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { @@ -37269,33 +37344,36 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask, unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}) && AllowFloatDomain) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG) && + AllowFloatDomain) { V2 = V1; V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1); Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS; SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}) && AllowFloatDomain) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}, DAG) && + AllowFloatDomain) { V2 = V1; Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS; SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}) && + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}, DAG) && Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) { std::swap(V1, V2); Shuffle = X86ISD::MOVSD; SrcVT = DstVT = MVT::v2f64; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}) && + if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}, DAG) && (AllowFloatDomain || !Subtarget.hasSSE41())) { Shuffle = X86ISD::MOVSS; SrcVT = DstVT = MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7}) && + if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7}, + DAG) && Subtarget.hasFP16()) { Shuffle = X86ISD::MOVSH; SrcVT = DstVT = MVT::v8f16; @@ -37678,7 +37756,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, scaleShuffleElements(Mask, NumElts, ScaledMask)) { for (unsigned i = 0; i != NumElts; ++i) IdentityMask.push_back(i); - if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2)) + if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, DAG, V1, + V2)) return CanonicalizeShuffleInput(RootVT, V1); } } @@ -37902,7 +37981,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, V1, - Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) && + DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) && (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 0 && Root.getOpcode() == Shuffle) @@ -37913,7 +37992,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, - AllowIntDomain, Subtarget, Shuffle, ShuffleVT, + AllowIntDomain, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { @@ -37931,7 +38010,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, // TODO: Handle other insertions here as well? if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 && Subtarget.hasSSE41() && - !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3})) { + !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}, DAG)) { if (MaskEltSizeInBits == 32) { SDValue SrcV1 = V1, SrcV2 = V2; if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask, @@ -37947,12 +38026,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } } if (MaskEltSizeInBits == 64 && - isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}) && + isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}, DAG) && V2.getOpcode() == ISD::SCALAR_TO_VECTOR && V2.getScalarValueSizeInBits() <= 32) { if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS) return SDValue(); // Nothing to do! - PermuteImm = (/*DstIdx*/2 << 4) | (/*SrcIdx*/0 << 0); + PermuteImm = (/*DstIdx*/ 2 << 4) | (/*SrcIdx*/ 0 << 0); Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, CanonicalizeShuffleInput(MVT::v4f32, V1), CanonicalizeShuffleInput(MVT::v4f32, V2), @@ -51654,9 +51733,13 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands. // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands. // Otherwise use PCMPEQ (plus AND) and mask testing. - if ((OpSize == 128 && Subtarget.hasSSE2()) || - (OpSize == 256 && Subtarget.hasAVX()) || - (OpSize == 512 && Subtarget.useAVX512Regs())) { + bool NoImplicitFloatOps = + DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps && + ((OpSize == 128 && Subtarget.hasSSE2()) || + (OpSize == 256 && Subtarget.hasAVX()) || + (OpSize == 512 && Subtarget.useAVX512Regs()))) { bool HasPT = Subtarget.hasSSE41(); // PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index a55b95960aa6..6124755ca539 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1532,44 +1532,6 @@ def : Pat<(xor GR32:$src1, -2147483648), } //===----------------------------------------------------------------------===// -// Pattern match SUB as XOR -//===----------------------------------------------------------------------===// - -// An immediate in the LHS of a subtract can't be encoded in the instruction. -// If there is no possibility of a borrow we can use an XOR instead of a SUB -// to enable the immediate to be folded. -// TODO: Move this to a DAG combine? - -def sub_is_xor : PatFrag<(ops node:$lhs, node:$rhs), (sub node:$lhs, node:$rhs),[{ - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) { - KnownBits Known = CurDAG->computeKnownBits(N->getOperand(1)); - - // If all possible ones in the RHS are set in the LHS then there can't be - // a borrow and we can use xor. - return (~Known.Zero).isSubsetOf(CN->getAPIntValue()); - } - - return false; -}]>; - -let AddedComplexity = 5 in { -def : Pat<(sub_is_xor imm:$src2, GR8:$src1), - (XOR8ri GR8:$src1, imm:$src2)>; -def : Pat<(sub_is_xor i16immSExt8:$src2, GR16:$src1), - (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(sub_is_xor imm:$src2, GR16:$src1), - (XOR16ri GR16:$src1, imm:$src2)>; -def : Pat<(sub_is_xor i32immSExt8:$src2, GR32:$src1), - (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(sub_is_xor imm:$src2, GR32:$src1), - (XOR32ri GR32:$src1, imm:$src2)>; -def : Pat<(sub_is_xor i64immSExt8:$src2, GR64:$src1), - (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(sub_is_xor i64immSExt32:$src2, GR64:$src1), - (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; -} - -//===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp index 52b2a62316cd..c4317be664fd 100644 --- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp +++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp @@ -13,8 +13,8 @@ #include "X86InstrFMA3Info.h" #include "X86InstrInfo.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Threading.h" +#include <atomic> #include <cassert> #include <cstdint> diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp index 27220a8d4d99..8aeb169929f2 100644 --- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp +++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp @@ -13,6 +13,7 @@ #include "X86InstrFoldTables.h" #include "X86InstrInfo.h" #include "llvm/ADT/STLExtras.h" +#include <atomic> #include <vector> using namespace llvm; @@ -6102,7 +6103,7 @@ llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) { namespace { // This class stores the memory unfolding tables. It is instantiated as a -// ManagedStatic to lazily init the unfolding table. +// function scope static variable to lazily init the unfolding table. struct X86MemUnfoldTable { // Stores memory unfolding tables entries sorted by opcode. std::vector<X86MemoryFoldTableEntry> Table; @@ -6159,11 +6160,10 @@ struct X86MemUnfoldTable { }; } -static ManagedStatic<X86MemUnfoldTable> MemUnfoldTable; - const X86MemoryFoldTableEntry * llvm::lookupUnfoldTable(unsigned MemOp) { - auto &Table = MemUnfoldTable->Table; + static X86MemUnfoldTable MemUnfoldTable; + auto &Table = MemUnfoldTable.Table; auto I = llvm::lower_bound(Table, MemOp); if (I != Table.end() && I->KeyOp == MemOp) return &*I; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 7f6ef3479d40..4a9a281d5b99 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -978,6 +978,7 @@ def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">; def HasCLWB : Predicate<"Subtarget->hasCLWB()">; def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">; def HasRDPID : Predicate<"Subtarget->hasRDPID()">; +def HasRDPRU : Predicate<"Subtarget->hasRDPRU()">; def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">; def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">; def HasCX8 : Predicate<"Subtarget->hasCX8()">; diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 3a653a56e534..b1ca87279007 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -735,6 +735,15 @@ def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst), } // SchedRW //===----------------------------------------------------------------------===// +// RDPRU - Read Processor Register instruction. + +let SchedRW = [WriteSystem] in { +let Uses = [ECX], Defs = [EAX, EDX] in + def RDPRU : I<0x01, MRM_FD, (outs), (ins), "rdpru", []>, PS, + Requires<[HasRDPRU]>; +} + +//===----------------------------------------------------------------------===// // Platform Configuration instruction // From ISA docs: diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 3c8be95b43e3..6112c0b7d6c3 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -37,7 +37,7 @@ enum IntrinsicType : uint16_t { TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2DQ_MASK, CVTQQ2PS_MASK, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, FIXUPIMM, FIXUPIMM_MASKZ, GATHER_AVX2, - ROUNDP, ROUNDS + ROUNDP, ROUNDS, RDPRU }; struct IntrinsicData { @@ -309,6 +309,7 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, 0, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, 0, 0), X86_INTRINSIC_DATA(rdpmc, RDPMC, X86::RDPMC, 0), + X86_INTRINSIC_DATA(rdpru, RDPRU, X86::RDPRU, 0), X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0), X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0), X86_INTRINSIC_DATA(rdrand_64, RDRAND, X86ISD::RDRAND, 0), diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index b107de692365..3fbdb18a0793 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -2413,6 +2413,10 @@ static void addConstantComments(const MachineInstr *MI, } void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { + // FIXME: Enable feature predicate checks once all the test pass. + // X86_MC::verifyInstructionPredicates(MI->getOpcode(), + // Subtarget->getFeatureBits()); + X86MCInstLower MCInstLowering(*MF, *this); const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo(); diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp index 7761f7323358..c760a32e2579 100644 --- a/llvm/lib/Target/X86/X86PartialReduction.cpp +++ b/llvm/lib/Target/X86/X86PartialReduction.cpp @@ -439,8 +439,8 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) { while (!Worklist.empty()) { Value *V = Worklist.pop_back_val(); - if (!Visited.insert(V).second) - continue; + if (!Visited.insert(V).second) + continue; if (auto *PN = dyn_cast<PHINode>(V)) { // PHI node should have single use unless it is the root node, then it @@ -466,7 +466,7 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) { // gets us back to this node. if (BO->hasNUses(BO == Root ? 3 : 2)) { PHINode *PN = nullptr; - for (auto *U : Root->users()) + for (auto *U : BO->users()) if (auto *P = dyn_cast<PHINode>(U)) if (!Visited.count(P)) PN = P; diff --git a/llvm/lib/Target/X86/X86ReturnThunks.cpp b/llvm/lib/Target/X86/X86ReturnThunks.cpp new file mode 100644 index 000000000000..4b203229ba83 --- /dev/null +++ b/llvm/lib/Target/X86/X86ReturnThunks.cpp @@ -0,0 +1,92 @@ +//==- X86ReturnThunks.cpp - Replace rets with thunks or inline thunks --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Pass that replaces ret instructions with a jmp to __x86_return_thunk. +/// +/// This corresponds to -mfunction-return=thunk-extern or +/// __attribute__((function_return("thunk-extern"). +/// +/// This pass is a minimal implementation necessary to help mitigate +/// RetBleed for the Linux kernel. +/// +/// Should support for thunk or thunk-inline be necessary in the future, then +/// this pass should be combined with x86-retpoline-thunks which already has +/// machinery to emit thunks. Until then, YAGNI. +/// +/// This pass is very similar to x86-lvi-ret. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define PASS_KEY "x86-return-thunks" +#define DEBUG_TYPE PASS_KEY + +struct X86ReturnThunks final : public MachineFunctionPass { + static char ID; + X86ReturnThunks() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { return "X86 Return Thunks"; } + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +char X86ReturnThunks::ID = 0; + +bool X86ReturnThunks::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << getPassName() << "\n"); + + bool Modified = false; + + if (!MF.getFunction().hasFnAttribute(llvm::Attribute::FnRetThunkExtern)) + return Modified; + + StringRef ThunkName = "__x86_return_thunk"; + if (MF.getFunction().getName() == ThunkName) + return Modified; + + const auto &ST = MF.getSubtarget<X86Subtarget>(); + const bool Is64Bit = ST.getTargetTriple().getArch() == Triple::x86_64; + const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32; + SmallVector<MachineInstr *, 16> Rets; + + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &Term : MBB.terminators()) + if (Term.getOpcode() == RetOpc) + Rets.push_back(&Term); + + const MCInstrDesc &JMP = ST.getInstrInfo()->get(X86::TAILJMPd); + + for (MachineInstr *Ret : Rets) { + BuildMI(Ret->getParent(), Ret->getDebugLoc(), JMP) + .addExternalSymbol(ThunkName.data()); + Ret->eraseFromParent(); + Modified = true; + } + + return Modified; +} + +INITIALIZE_PASS(X86ReturnThunks, PASS_KEY, "X86 Return Thunks", false, false) + +FunctionPass *llvm::createX86ReturnThunksPass() { + return new X86ReturnThunks(); +} diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 4249788e3540..f4e25e4194db 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -100,6 +100,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeX86OptimizeLEAPassPass(PR); initializeX86PartialReductionPass(PR); initializePseudoProbeInserterPass(PR); + initializeX86ReturnThunksPass(PR); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { @@ -575,6 +576,7 @@ void X86PassConfig::addPreEmitPass2() { // hand inspection of the codegen output. addPass(createX86SpeculativeExecutionSideEffectSuppression()); addPass(createX86IndirectThunksPass()); + addPass(createX86ReturnThunksPass()); // Insert extra int3 instructions after trailing call instructions to avoid // issues in the unwinder. diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index c286b747a271..a782ff436dc0 100644 --- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -29,6 +29,7 @@ using namespace llvm; #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "XCoreGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h index 096b22415a22..ec4418333859 100644 --- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h +++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h @@ -22,6 +22,7 @@ // Defines symbolic names for the XCore instructions. // #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "XCoreGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp index 8fea61d125d2..691fdf16bc0f 100644 --- a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -256,6 +256,9 @@ bool XCoreAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } void XCoreAsmPrinter::emitInstruction(const MachineInstr *MI) { + XCore_MC::verifyInstructionPredicates(MI->getOpcode(), + getSubtargetInfo().getFeatureBits()); + SmallString<128> Str; raw_svector_ostream O(Str); diff --git a/llvm/lib/ToolDrivers/llvm-lib/Options.td b/llvm/lib/ToolDrivers/llvm-lib/Options.td index 0d97f77e525f..9d969b040ef2 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/Options.td +++ b/llvm/lib/ToolDrivers/llvm-lib/Options.td @@ -44,5 +44,7 @@ def help_q : Flag<["/??", "-??", "/?", "-?"], "">, Alias<help>; //============================================================================== def ltcg : F<"ltcg">; +def nodefaultlib: P<"nodefaultlib", "">; +def nodefaultlib_all: F<"nodefaultlib">; def nologo : F<"nologo">; def subsystem : P<"subsystem", "">; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index d09607bb1c4c..51eb8ebf0369 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -881,16 +881,16 @@ static DIType *solveDIType(DIBuilder &Builder, Type *Ty, dwarf::DW_ATE_float, llvm::DINode::FlagArtificial); } else if (Ty->isPointerTy()) { - // Construct BasicType instead of PointerType to avoid infinite - // search problem. - // For example, we would be in trouble if we traverse recursively: + // Construct PointerType points to null (aka void *) instead of exploring + // pointee type to avoid infinite search problem. For example, we would be + // in trouble if we traverse recursively: // // struct Node { // Node* ptr; // }; - RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty), - dwarf::DW_ATE_address, - llvm::DINode::FlagArtificial); + RetType = Builder.createPointerType(nullptr, Layout.getTypeSizeInBits(Ty), + Layout.getABITypeAlignment(Ty), + /*DWARFAddressSpace=*/None, Name); } else if (Ty->isStructTy()) { auto *DIStruct = Builder.createStructType( Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty), @@ -914,13 +914,21 @@ static DIType *solveDIType(DIBuilder &Builder, Type *Ty, RetType = DIStruct; } else { - LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n";); - SmallString<32> Buffer; - raw_svector_ostream OS(Buffer); - OS << Name.str() << "_" << Layout.getTypeSizeInBits(Ty); - RetType = Builder.createBasicType(OS.str(), Layout.getTypeSizeInBits(Ty), - dwarf::DW_ATE_address, - llvm::DINode::FlagArtificial); + LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n"); + TypeSize Size = Layout.getTypeSizeInBits(Ty); + auto *CharSizeType = Builder.createBasicType( + Name, 8, dwarf::DW_ATE_unsigned_char, llvm::DINode::FlagArtificial); + + if (Size <= 8) + RetType = CharSizeType; + else { + if (Size % 8 != 0) + Size = TypeSize::Fixed(Size + 8 - (Size % 8)); + + RetType = Builder.createArrayType( + Size, Layout.getPrefTypeAlign(Ty).value(), CharSizeType, + Builder.getOrCreateArray(Builder.getOrCreateSubrange(0, Size / 8))); + } } DITypeCache.insert({Ty, RetType}); @@ -971,7 +979,8 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, unsigned LineNum = PromiseDIVariable->getLine(); DICompositeType *FrameDITy = DBuilder.createStructType( - DIS, "__coro_frame_ty", DFile, LineNum, Shape.FrameSize * 8, + DIS->getUnit(), Twine(F.getName() + ".coro_frame_ty").str(), + DFile, LineNum, Shape.FrameSize * 8, Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr, llvm::DINodeArray()); StructType *FrameTy = Shape.FrameTy; @@ -995,14 +1004,12 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, *IndexTy = FrameTy->getElementType(IndexIndex); DenseMap<unsigned, DIType *> TyCache; - TyCache.insert({ResumeIndex, - DBuilder.createBasicType("__resume_fn", - Layout.getTypeSizeInBits(ResumeFnTy), - dwarf::DW_ATE_address)}); TyCache.insert( - {DestroyIndex, DBuilder.createBasicType( - "__destroy_fn", Layout.getTypeSizeInBits(DestroyFnTy), - dwarf::DW_ATE_address)}); + {ResumeIndex, DBuilder.createPointerType( + nullptr, Layout.getTypeSizeInBits(ResumeFnTy))}); + TyCache.insert( + {DestroyIndex, DBuilder.createPointerType( + nullptr, Layout.getTypeSizeInBits(DestroyFnTy))}); /// FIXME: If we fill the field `SizeInBits` with the actual size of /// __coro_index in bits, then __coro_index wouldn't show in the debugger. diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index ead552d9be4e..9c1b247cdb39 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -389,7 +389,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { // Replace CoroSave with a store to Index: // %index.addr = getelementptr %f.frame... (index field number) - // store i32 0, i32* %index.addr1 + // store i32 %IndexVal, i32* %index.addr1 auto *Save = S->getCoroSave(); Builder.SetInsertPoint(Save); if (S->isFinal()) { diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index b05b7990e3f0..e5ff98e4f73f 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -718,8 +718,8 @@ Argument *IRPosition::getAssociatedArgument() const { } // If we found a unique callback candidate argument, return it. - if (CBCandidateArg && CBCandidateArg.getValue()) - return CBCandidateArg.getValue(); + if (CBCandidateArg && CBCandidateArg.value()) + return CBCandidateArg.value(); // If no callbacks were found, or none used the underlying call site operand // exclusively, use the direct callee argument if available. @@ -1048,11 +1048,11 @@ Attributor::getAssumedConstant(const IRPosition &IRP, recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); return llvm::None; } - if (isa_and_nonnull<UndefValue>(SimplifiedV.getValue())) { + if (isa_and_nonnull<UndefValue>(SimplifiedV.value())) { recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); return UndefValue::get(IRP.getAssociatedType()); } - Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.getValue()); + Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.value()); if (CI) CI = dyn_cast_or_null<Constant>( AA::getWithType(*CI, *IRP.getAssociatedType())); @@ -2697,8 +2697,8 @@ void InformationCache::initializeInformationCache(const Function &CF, Optional<short> &NumUses = AssumeUsesMap[I]; if (!NumUses) NumUses = I->getNumUses(); - NumUses = NumUses.getValue() - /* this assume */ 1; - if (NumUses.getValue() != 0) + NumUses = NumUses.value() - /* this assume */ 1; + if (NumUses.value() != 0) continue; AssumeOnlyValues.insert(I); for (const Value *Op : I->operands()) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 4d99ce7e3175..1ff54b78e27e 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -437,7 +437,7 @@ static bool genericValueTraversal( A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation); if (!SimpleV) continue; - Value *NewV = SimpleV.getValue(); + Value *NewV = SimpleV.value(); if (NewV && NewV != V) { if ((VS & AA::Interprocedural) || !CtxI || AA::isValidInScope(*NewV, CtxI->getFunction())) { @@ -1891,14 +1891,14 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { // Check if we have an assumed unique return value that we could manifest. Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A); - if (!UniqueRV || !UniqueRV.getValue()) + if (!UniqueRV || !UniqueRV.value()) return Changed; // Bookkeeping. STATS_DECLTRACK(UniqueReturnValue, FunctionReturn, "Number of function with unique return"); // If the assumed unique return value is an argument, annotate it. - if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) { + if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.value())) { if (UniqueRVArg->getType()->canLosslesslyBitCastTo( getAssociatedFunction()->getReturnType())) { getIRPosition() = IRPosition::argument(*UniqueRVArg); @@ -2666,9 +2666,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { // Either we stopped and the appropriate action was taken, // or we got back a simplified value to continue. Optional<Value *> SimplifiedPtrOp = stopOnUndefOrAssumed(A, PtrOp, &I); - if (!SimplifiedPtrOp || !SimplifiedPtrOp.getValue()) + if (!SimplifiedPtrOp || !SimplifiedPtrOp.value()) return true; - const Value *PtrOpVal = SimplifiedPtrOp.getValue(); + const Value *PtrOpVal = SimplifiedPtrOp.value(); // A memory access through a pointer is considered UB // only if the pointer has constant null value. @@ -2757,14 +2757,14 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { IRPosition::value(*ArgVal), *this, UsedAssumedInformation); if (UsedAssumedInformation) continue; - if (SimplifiedVal && !SimplifiedVal.getValue()) + if (SimplifiedVal && !SimplifiedVal.value()) return true; - if (!SimplifiedVal || isa<UndefValue>(*SimplifiedVal.getValue())) { + if (!SimplifiedVal || isa<UndefValue>(*SimplifiedVal.value())) { KnownUBInsts.insert(&I); continue; } if (!ArgVal->getType()->isPointerTy() || - !isa<ConstantPointerNull>(*SimplifiedVal.getValue())) + !isa<ConstantPointerNull>(*SimplifiedVal.value())) continue; auto &NonNullAA = A.getAAFor<AANonNull>(*this, CalleeArgumentIRP, DepClassTy::NONE); @@ -4101,11 +4101,11 @@ identifyAliveSuccessors(Attributor &A, const SwitchInst &SI, bool UsedAssumedInformation = false; Optional<Constant *> C = A.getAssumedConstant(*SI.getCondition(), AA, UsedAssumedInformation); - if (!C || isa_and_nonnull<UndefValue>(C.getValue())) { + if (!C || isa_and_nonnull<UndefValue>(C.value())) { // No value yet, assume all edges are dead. - } else if (isa_and_nonnull<ConstantInt>(C.getValue())) { + } else if (isa_and_nonnull<ConstantInt>(C.value())) { for (auto &CaseIt : SI.cases()) { - if (CaseIt.getCaseValue() == C.getValue()) { + if (CaseIt.getCaseValue() == C.value()) { AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front()); return UsedAssumedInformation; } @@ -5523,11 +5523,10 @@ struct AAValueSimplifyImpl : AAValueSimplify { if (!SimpleV) return PoisonValue::get(&Ty); Value *EffectiveV = &V; - if (SimpleV.getValue()) - EffectiveV = SimpleV.getValue(); + if (SimpleV.value()) + EffectiveV = SimpleV.value(); if (auto *C = dyn_cast<Constant>(EffectiveV)) - if (!C->canTrap()) - return C; + return C; if (CtxI && AA::isValidAtPosition(AA::ValueAndContext(*EffectiveV, *CtxI), A.getInfoCache())) return ensureType(A, *EffectiveV, Ty, CtxI, Check); @@ -5541,7 +5540,7 @@ struct AAValueSimplifyImpl : AAValueSimplify { /// nullptr if we don't have one that makes sense. Value *manifestReplacementValue(Attributor &A, Instruction *CtxI) const { Value *NewV = SimplifiedAssociatedValue - ? SimplifiedAssociatedValue.getValue() + ? SimplifiedAssociatedValue.value() : UndefValue::get(getAssociatedType()); if (NewV && NewV != &getAssociatedValue()) { ValueToValueMapTy VMap; @@ -5672,7 +5671,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { A.getAssumedConstant(ACSArgPos, *this, UsedAssumedInformation); if (!SimpleArgOp) return true; - if (!SimpleArgOp.getValue()) + if (!SimpleArgOp.value()) return false; if (!AA::isDynamicallyUnique(A, *this, **SimpleArgOp)) return false; @@ -5787,7 +5786,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { *this, UsedAssumedInformation); if (!SimplifiedLHS) return true; - if (!SimplifiedLHS.getValue()) + if (!SimplifiedLHS.value()) return false; LHS = *SimplifiedLHS; @@ -5796,7 +5795,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { *this, UsedAssumedInformation); if (!SimplifiedRHS) return true; - if (!SimplifiedRHS.getValue()) + if (!SimplifiedRHS.value()) return false; RHS = *SimplifiedRHS; @@ -5868,8 +5867,8 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { if (!SimplifiedOp) return true; - if (SimplifiedOp.getValue()) - NewOps[Idx] = SimplifiedOp.getValue(); + if (SimplifiedOp.value()) + NewOps[Idx] = SimplifiedOp.value(); else NewOps[Idx] = Op; @@ -6112,6 +6111,10 @@ struct AAHeapToStackFunction final : public AAHeapToStack { /// but which is not in the deallocation infos. bool HasPotentiallyFreeingUnknownUses = false; + /// Flag to indicate that we should place the new alloca in the function + /// entry block rather than where the call site (CB) is. + bool MoveAllocaIntoEntry = true; + /// The set of free calls that use this allocation. SmallSetVector<CallBase *, 1> PotentialFreeCalls{}; }; @@ -6242,17 +6245,6 @@ struct AAHeapToStackFunction final : public AAHeapToStack { Function *F = getAnchorScope(); const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); - LoopInfo *LI = - A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F); - Optional<bool> MayContainIrreducibleControl; - auto IsInLoop = [&](BasicBlock &BB) { - if (!MayContainIrreducibleControl.has_value()) - MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI); - if (MayContainIrreducibleControl.value()) - return true; - return LI->getLoopFor(&BB) != nullptr; - }; - for (auto &It : AllocationInfos) { AllocationInfo &AI = *It.second; if (AI.Status == AllocationInfo::INVALID) @@ -6294,25 +6286,25 @@ struct AAHeapToStackFunction final : public AAHeapToStack { Size = SizeOffsetPair.first; } - Instruction *IP = (!SizeAPI.has_value() || IsInLoop(*AI.CB->getParent())) - ? AI.CB - : &F->getEntryBlock().front(); + Instruction *IP = + AI.MoveAllocaIntoEntry ? &F->getEntryBlock().front() : AI.CB; Align Alignment(1); if (MaybeAlign RetAlign = AI.CB->getRetAlign()) Alignment = std::max(Alignment, *RetAlign); if (Value *Align = getAllocAlignment(AI.CB, TLI)) { Optional<APInt> AlignmentAPI = getAPInt(A, *this, *Align); - assert(AlignmentAPI && AlignmentAPI.getValue().getZExtValue() > 0 && + assert(AlignmentAPI && AlignmentAPI.value().getZExtValue() > 0 && "Expected an alignment during manifest!"); Alignment = std::max( - Alignment, assumeAligned(AlignmentAPI.getValue().getZExtValue())); + Alignment, assumeAligned(AlignmentAPI.value().getZExtValue())); } // TODO: Hoist the alloca towards the function entry. unsigned AS = DL.getAllocaAddrSpace(); - Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS, - Size, Alignment, "", IP); + Instruction *Alloca = + new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment, + AI.CB->getName() + ".h2s", IP); if (Alloca->getType() != AI.CB->getType()) Alloca = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( @@ -6354,7 +6346,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack { A.getAssumedConstant(V, AA, UsedAssumedInformation); if (!SimpleV) return APInt(64, 0); - if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.getValue())) + if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.value())) return CI->getValue(); return llvm::None; } @@ -6400,6 +6392,21 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { bool StackIsAccessibleByOtherThreads = A.getInfoCache().stackIsAccessibleByOtherThreads(); + LoopInfo *LI = + A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F); + Optional<bool> MayContainIrreducibleControl; + auto IsInLoop = [&](BasicBlock &BB) { + if (&F->getEntryBlock() == &BB) + return false; + if (!MayContainIrreducibleControl.has_value()) + MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI); + if (MayContainIrreducibleControl.value()) + return true; + if (!LI) + return true; + return LI->getLoopFor(&BB) != nullptr; + }; + // Flag to ensure we update our deallocation information at most once per // updateImpl call and only if we use the free check reasoning. bool HasUpdatedFrees = false; @@ -6617,21 +6624,20 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { AI.Status = AllocationInfo::INVALID; Changed = ChangeStatus::CHANGED; continue; - } else { - if (APAlign->ugt(llvm::Value::MaximumAlignment) || - !APAlign->isPowerOf2()) { - LLVM_DEBUG(dbgs() << "[H2S] Invalid allocation alignment: " << APAlign - << "\n"); - AI.Status = AllocationInfo::INVALID; - Changed = ChangeStatus::CHANGED; - continue; - } + } + if (APAlign->ugt(llvm::Value::MaximumAlignment) || + !APAlign->isPowerOf2()) { + LLVM_DEBUG(dbgs() << "[H2S] Invalid allocation alignment: " << APAlign + << "\n"); + AI.Status = AllocationInfo::INVALID; + Changed = ChangeStatus::CHANGED; + continue; } } + Optional<APInt> Size = getSize(A, *this, AI); if (MaxHeapToStackSize != -1) { - Optional<APInt> Size = getSize(A, *this, AI); - if (!Size || Size.getValue().ugt(MaxHeapToStackSize)) { + if (!Size || Size.value().ugt(MaxHeapToStackSize)) { LLVM_DEBUG({ if (!Size) dbgs() << "[H2S] Unknown allocation size: " << *AI.CB << "\n"; @@ -6649,18 +6655,23 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { switch (AI.Status) { case AllocationInfo::STACK_DUE_TO_USE: if (UsesCheck(AI)) - continue; + break; AI.Status = AllocationInfo::STACK_DUE_TO_FREE; LLVM_FALLTHROUGH; case AllocationInfo::STACK_DUE_TO_FREE: if (FreeCheck(AI)) - continue; + break; AI.Status = AllocationInfo::INVALID; Changed = ChangeStatus::CHANGED; - continue; + break; case AllocationInfo::INVALID: llvm_unreachable("Invalid allocations should never reach this point!"); }; + + // Check if we still think we can move it into the entry block. + if (AI.MoveAllocaIntoEntry && + (!Size.has_value() || IsInLoop(*AI.CB->getParent()))) + AI.MoveAllocaIntoEntry = false; } return Changed; @@ -6748,8 +6759,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { LLVM_DEBUG({ dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: "; - if (CSTy && CSTy.getValue()) - CSTy.getValue()->print(dbgs()); + if (CSTy && CSTy.value()) + CSTy.value()->print(dbgs()); else if (CSTy) dbgs() << "<nullptr>"; else @@ -6760,8 +6771,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { LLVM_DEBUG({ dbgs() << " : New Type: "; - if (Ty && Ty.getValue()) - Ty.getValue()->print(dbgs()); + if (Ty && Ty.value()) + Ty.value()->print(dbgs()); else if (Ty) dbgs() << "<nullptr>"; else @@ -6769,7 +6780,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { dbgs() << "\n"; }); - return !Ty || Ty.getValue(); + return !Ty || Ty.value(); }; if (!A.checkForAllCallSites(CallSiteCheck, *this, true, @@ -6783,7 +6794,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { PrivatizableType = identifyPrivatizableType(A); if (!PrivatizableType) return ChangeStatus::UNCHANGED; - if (!PrivatizableType.getValue()) + if (!PrivatizableType.value()) return indicatePessimisticFixpoint(); // The dependence is optional so we don't give up once we give up on the @@ -6871,7 +6882,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType(); if (!CBArgPrivTy) continue; - if (CBArgPrivTy.getValue() == PrivatizableType) + if (CBArgPrivTy.value() == PrivatizableType) continue; } @@ -6918,7 +6929,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType(); if (!DCArgPrivTy) return true; - if (DCArgPrivTy.getValue() == PrivatizableType) + if (DCArgPrivTy.value() == PrivatizableType) return true; } } @@ -7060,7 +7071,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { ChangeStatus manifest(Attributor &A) override { if (!PrivatizableType) return ChangeStatus::UNCHANGED; - assert(PrivatizableType.getValue() && "Expected privatizable type!"); + assert(PrivatizableType.value() && "Expected privatizable type!"); // Collect all tail calls in the function as we cannot allow new allocas to // escape into tail recursion. @@ -7093,9 +7104,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Instruction *IP = &*EntryBB.getFirstInsertionPt(); const DataLayout &DL = IP->getModule()->getDataLayout(); unsigned AS = DL.getAllocaAddrSpace(); - Instruction *AI = new AllocaInst(PrivatizableType.getValue(), AS, + Instruction *AI = new AllocaInst(PrivatizableType.value(), AS, Arg->getName() + ".priv", IP); - createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn, + createInitialization(PrivatizableType.value(), *AI, ReplacementFn, ArgIt->getArgNo(), *IP); if (AI->getType() != Arg->getType()) @@ -7203,7 +7214,7 @@ struct AAPrivatizablePtrCallSiteArgument final PrivatizableType = identifyPrivatizableType(A); if (!PrivatizableType) return ChangeStatus::UNCHANGED; - if (!PrivatizableType.getValue()) + if (!PrivatizableType.value()) return indicatePessimisticFixpoint(); const IRPosition &IRP = getIRPosition(); @@ -8664,7 +8675,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { *this, UsedAssumedInformation); if (!SimplifiedLHS) return true; - if (!SimplifiedLHS.getValue()) + if (!SimplifiedLHS.value()) return false; LHS = *SimplifiedLHS; @@ -8673,7 +8684,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { *this, UsedAssumedInformation); if (!SimplifiedRHS) return true; - if (!SimplifiedRHS.getValue()) + if (!SimplifiedRHS.value()) return false; RHS = *SimplifiedRHS; @@ -8717,7 +8728,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { *this, UsedAssumedInformation); if (!SimplifiedOpV) return true; - if (!SimplifiedOpV.getValue()) + if (!SimplifiedOpV.value()) return false; OpV = *SimplifiedOpV; @@ -8747,7 +8758,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { *this, UsedAssumedInformation); if (!SimplifiedLHS) return true; - if (!SimplifiedLHS.getValue()) + if (!SimplifiedLHS.value()) return false; LHS = *SimplifiedLHS; @@ -8756,7 +8767,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { *this, UsedAssumedInformation); if (!SimplifiedRHS) return true; - if (!SimplifiedRHS.getValue()) + if (!SimplifiedRHS.value()) return false; RHS = *SimplifiedRHS; @@ -8821,7 +8832,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { *this, UsedAssumedInformation); if (!SimplifiedOpV) return true; - if (!SimplifiedOpV.getValue()) + if (!SimplifiedOpV.value()) return false; Value *VPtr = *SimplifiedOpV; @@ -9182,7 +9193,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { *this, UsedAssumedInformation); if (!SimplifiedLHS) return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.getValue()) + if (!SimplifiedLHS.value()) return indicatePessimisticFixpoint(); LHS = *SimplifiedLHS; @@ -9191,7 +9202,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { *this, UsedAssumedInformation); if (!SimplifiedRHS) return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.getValue()) + if (!SimplifiedRHS.value()) return indicatePessimisticFixpoint(); RHS = *SimplifiedRHS; @@ -9265,7 +9276,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { *this, UsedAssumedInformation); if (!SimplifiedLHS) return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.getValue()) + if (!SimplifiedLHS.value()) return indicatePessimisticFixpoint(); LHS = *SimplifiedLHS; @@ -9274,7 +9285,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { *this, UsedAssumedInformation); if (!SimplifiedRHS) return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.getValue()) + if (!SimplifiedRHS.value()) return indicatePessimisticFixpoint(); RHS = *SimplifiedRHS; @@ -9340,7 +9351,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { *this, UsedAssumedInformation); if (!SimplifiedSrc) return ChangeStatus::UNCHANGED; - if (!SimplifiedSrc.getValue()) + if (!SimplifiedSrc.value()) return indicatePessimisticFixpoint(); Src = *SimplifiedSrc; @@ -9373,7 +9384,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { *this, UsedAssumedInformation); if (!SimplifiedLHS) return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.getValue()) + if (!SimplifiedLHS.value()) return indicatePessimisticFixpoint(); LHS = *SimplifiedLHS; @@ -9382,7 +9393,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { *this, UsedAssumedInformation); if (!SimplifiedRHS) return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.getValue()) + if (!SimplifiedRHS.value()) return indicatePessimisticFixpoint(); RHS = *SimplifiedRHS; @@ -9441,7 +9452,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { UsedAssumedInformation); if (!SimplifiedIncomingValue) continue; - if (!SimplifiedIncomingValue.getValue()) + if (!SimplifiedIncomingValue.value()) return indicatePessimisticFixpoint(); IncomingValue = *SimplifiedIncomingValue; @@ -9930,7 +9941,7 @@ private: const Function &Fn) { Optional<bool> Cached = isCachedReachable(Fn); if (Cached) - return Cached.getValue(); + return Cached.value(); // The query was not cached, thus it is new. We need to request an update // explicitly to make sure this the information is properly run to a diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 1a1bde4f0668..1ad6e2b2a1d2 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1584,11 +1584,6 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, } Value *StoredOnceValue = GS.getStoredOnceValue(); if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) { - // Avoid speculating constant expressions that might trap (div/rem). - auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue); - if (SOVConstant && SOVConstant->canTrap()) - return Changed; - Function &StoreFn = const_cast<Function &>(*GS.StoredOnceStore->getFunction()); bool CanHaveNonUndefGlobalInitializer = @@ -1601,6 +1596,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, // This is restricted to address spaces that allow globals to have // initializers. NVPTX, for example, does not support initializers for // shared memory (AS 3). + auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue); if (SOVConstant && isa<UndefValue>(GV->getInitializer()) && DL.getTypeAllocSize(SOVConstant->getType()) == DL.getTypeAllocSize(GV->getValueType()) && diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index d75d99e307fd..28bc43aa1633 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -555,7 +555,7 @@ collectRegionsConstants(OutlinableRegion &Region, for (Value *V : ID.OperVals) { Optional<unsigned> GVNOpt = C.getGVN(V); assert(GVNOpt && "Expected a GVN for operand?"); - unsigned GVN = GVNOpt.getValue(); + unsigned GVN = GVNOpt.value(); // Check if this global value has been found to not be the same already. if (NotSame.contains(GVN)) { @@ -570,7 +570,7 @@ collectRegionsConstants(OutlinableRegion &Region, // it is considered to not be the same value. Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant); if (ConstantMatches) { - if (ConstantMatches.getValue()) + if (ConstantMatches.value()) continue; else ConstantsTheSame = false; @@ -651,7 +651,7 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group, // Transfer the swifterr attribute to the correct function parameter. if (Group.SwiftErrorArgument) - Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(), + Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.value(), Attribute::SwiftError); Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize); @@ -809,7 +809,7 @@ static void mapInputsToGVNs(IRSimilarityCandidate &C, if (OutputMappings.find(Input) != OutputMappings.end()) Input = OutputMappings.find(Input)->second; assert(C.getGVN(Input) && "Could not find a numbering for the given input"); - EndInputNumbers.push_back(C.getGVN(Input).getValue()); + EndInputNumbers.push_back(C.getGVN(Input).value()); } } @@ -948,11 +948,11 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region, for (unsigned InputVal : InputGVNs) { Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal); assert(CanonicalNumberOpt && "Canonical number not found?"); - unsigned CanonicalNumber = CanonicalNumberOpt.getValue(); + unsigned CanonicalNumber = CanonicalNumberOpt.value(); Optional<Value *> InputOpt = C.fromGVN(InputVal); assert(InputOpt && "Global value number not found?"); - Value *Input = InputOpt.getValue(); + Value *Input = InputOpt.value(); DenseMap<unsigned, unsigned>::iterator AggArgIt = Group.CanonicalNumberToAggArg.find(CanonicalNumber); @@ -1236,13 +1236,13 @@ static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region, Optional<unsigned> BBGVN = Cand.getGVN(PHIBB); assert(BBGVN && "Could not find GVN for the incoming block!"); - BBGVN = Cand.getCanonicalNum(BBGVN.getValue()); + BBGVN = Cand.getCanonicalNum(BBGVN.value()); assert(BBGVN && "Could not find canonical number for the incoming block!"); // Create a pair of the exit block canonical value, and the aggregate // argument location, connected to the canonical numbers stored in the // PHINode. PHINodeData TemporaryPair = - std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs); + std::make_pair(std::make_pair(BBGVN.value(), AggArgIdx), PHIGVNs); hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair); // Look for and create a new entry in our connection between canonical @@ -1516,8 +1516,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) { // Make sure that the argument in the new function has the SwiftError // argument. if (Group.SwiftErrorArgument) - Call->addParamAttr(Group.SwiftErrorArgument.getValue(), - Attribute::SwiftError); + Call->addParamAttr(Group.SwiftErrorArgument.value(), Attribute::SwiftError); return Call; } @@ -2082,9 +2081,9 @@ static void alignOutputBlockWithAggFunc( if (MatchingBB) { LLVM_DEBUG(dbgs() << "Set output block for region in function" << Region.ExtractedFunction << " to " - << MatchingBB.getValue()); + << MatchingBB.value()); - Region.OutputBlockNum = MatchingBB.getValue(); + Region.OutputBlockNum = MatchingBB.value(); for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) VtoBB.second->eraseFromParent(); return; @@ -2679,15 +2678,14 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region, if (!OutputIdx) return; - if (OutputMappings.find(Outputs[OutputIdx.getValue()]) == - OutputMappings.end()) { + if (OutputMappings.find(Outputs[OutputIdx.value()]) == OutputMappings.end()) { LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to " - << *Outputs[OutputIdx.getValue()] << "\n"); - OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()])); + << *Outputs[OutputIdx.value()] << "\n"); + OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.value()])); } else { - Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second; + Value *Orig = OutputMappings.find(Outputs[OutputIdx.value()])->second; LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to " - << *Outputs[OutputIdx.getValue()] << "\n"); + << *Outputs[OutputIdx.value()] << "\n"); OutputMappings.insert(std::make_pair(LI, Orig)); } } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 227ad8501f25..8e0ca8c6c997 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -3340,6 +3340,9 @@ struct AAKernelInfoFunction : AAKernelInfo { } bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) { + if (!mayContainParallelRegion()) + return false; + auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); if (!SPMDCompatibilityTracker.isAssumed()) { @@ -4428,10 +4431,10 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { if (!SimplifiedValue) return Str + std::string("none"); - if (!SimplifiedValue.getValue()) + if (!SimplifiedValue.value()) return Str + std::string("nullptr"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.getValue())) + if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.value())) return Str + std::to_string(CI->getSExtValue()); return Str + std::string("unknown"); @@ -4456,7 +4459,7 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { [&](const IRPosition &IRP, const AbstractAttribute *AA, bool &UsedAssumedInformation) -> Optional<Value *> { assert((isValidState() || - (SimplifiedValue && SimplifiedValue.getValue() == nullptr)) && + (SimplifiedValue && SimplifiedValue.value() == nullptr)) && "Unexpected invalid state!"); if (!isAtFixpoint()) { diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index ae787be40c55..8eef82675e86 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -898,183 +898,6 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createAnnotationRemarksLegacyPass()); } -void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { - // Load sample profile before running the LTO optimization pipeline. - if (!PGOSampleUse.empty()) { - PM.add(createPruneEHPass()); - PM.add(createSampleProfileLoaderPass(PGOSampleUse)); - } - - // Remove unused virtual tables to improve the quality of code generated by - // whole-program devirtualization and bitset lowering. - PM.add(createGlobalDCEPass()); - - // Provide AliasAnalysis services for optimizations. - addInitialAliasAnalysisPasses(PM); - - // Allow forcing function attributes as a debugging and tuning aid. - PM.add(createForceFunctionAttrsLegacyPass()); - - // Infer attributes about declarations if possible. - PM.add(createInferFunctionAttrsLegacyPass()); - - if (OptLevel > 1) { - // Split call-site with more constrained arguments. - PM.add(createCallSiteSplittingPass()); - - // Propage constant function arguments by specializing the functions. - if (EnableFunctionSpecialization && OptLevel > 2) - PM.add(createFunctionSpecializationPass()); - - // Propagate constants at call sites into the functions they call. This - // opens opportunities for globalopt (and inlining) by substituting function - // pointers passed as arguments to direct uses of functions. - PM.add(createIPSCCPPass()); - - // Attach metadata to indirect call sites indicating the set of functions - // they may target at run-time. This should follow IPSCCP. - PM.add(createCalledValuePropagationPass()); - - // Infer attributes on declarations, call sites, arguments, etc. - if (AttributorRun & AttributorRunOption::MODULE) - PM.add(createAttributorLegacyPass()); - } - - // Infer attributes about definitions. The readnone attribute in particular is - // required for virtual constant propagation. - PM.add(createPostOrderFunctionAttrsLegacyPass()); - PM.add(createReversePostOrderFunctionAttrsPass()); - - // Split globals using inrange annotations on GEP indices. This can help - // improve the quality of generated code when virtual constant propagation or - // control flow integrity are enabled. - PM.add(createGlobalSplitPass()); - - // Apply whole-program devirtualization and virtual constant propagation. - PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); - - // That's all we need at opt level 1. - if (OptLevel == 1) - return; - - // Now that we internalized some globals, see if we can hack on them! - PM.add(createGlobalOptimizerPass()); - // Promote any localized global vars. - PM.add(createPromoteMemoryToRegisterPass()); - - // Linking modules together can lead to duplicated global constants, only - // keep one copy of each constant. - PM.add(createConstantMergePass()); - - // Remove unused arguments from functions. - PM.add(createDeadArgEliminationPass()); - - // Reduce the code after globalopt and ipsccp. Both can open up significant - // simplification opportunities, and both can propagate functions through - // function pointers. When this happens, we often have to resolve varargs - // calls, etc, so let instcombine do this. - if (OptLevel > 2) - PM.add(createAggressiveInstCombinerPass()); - PM.add(createInstructionCombiningPass()); - addExtensionsToPM(EP_Peephole, PM); - - // Inline small functions - bool RunInliner = Inliner; - if (RunInliner) { - PM.add(Inliner); - Inliner = nullptr; - } - - PM.add(createPruneEHPass()); // Remove dead EH info. - - // Infer attributes on declarations, call sites, arguments, etc. for an SCC. - if (AttributorRun & AttributorRunOption::CGSCC) - PM.add(createAttributorCGSCCLegacyPass()); - - // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if - // there are no OpenMP runtime calls present in the module. - if (OptLevel > 1) - PM.add(createOpenMPOptCGSCCLegacyPass()); - - // Optimize globals again if we ran the inliner. - if (RunInliner) - PM.add(createGlobalOptimizerPass()); - PM.add(createGlobalDCEPass()); // Remove dead functions. - - // The IPO passes may leave cruft around. Clean up after them. - PM.add(createInstructionCombiningPass()); - addExtensionsToPM(EP_Peephole, PM); - PM.add(createJumpThreadingPass()); - - // Break up allocas - PM.add(createSROAPass()); - - // LTO provides additional opportunities for tailcall elimination due to - // link-time inlining, and visibility of nocapture attribute. - if (OptLevel > 1) - PM.add(createTailCallEliminationPass()); - - // Infer attributes on declarations, call sites, arguments, etc. - PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. - // Run a few AA driven optimizations here and now, to cleanup the code. - PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, - /*AllowSpeculation=*/true)); - PM.add(NewGVN ? createNewGVNPass() - : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. - PM.add(createMemCpyOptPass()); // Remove dead memcpys. - - // Nuke dead stores. - PM.add(createDeadStoreEliminationPass()); - PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. - - // More loops are countable; try to optimize them. - if (EnableLoopFlatten) - PM.add(createLoopFlattenPass()); - PM.add(createIndVarSimplifyPass()); - PM.add(createLoopDeletionPass()); - if (EnableLoopInterchange) - PM.add(createLoopInterchangePass()); - - if (EnableConstraintElimination) - PM.add(createConstraintEliminationPass()); - - // Unroll small loops and perform peeling. - PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, - ForgetAllSCEVInLoopUnroll)); - PM.add(createLoopDistributePass()); - - addVectorPasses(PM, /* IsFullLTO */ true); - - addExtensionsToPM(EP_Peephole, PM); - - PM.add(createJumpThreadingPass()); -} - -void PassManagerBuilder::addLateLTOOptimizationPasses( - legacy::PassManagerBase &PM) { - // See comment in the new PM for justification of scheduling splitting at - // this stage (\ref buildLTODefaultPipeline). - if (EnableHotColdSplit) - PM.add(createHotColdSplittingPass()); - - // Delete basic blocks, which optimization passes may have killed. - PM.add( - createCFGSimplificationPass(SimplifyCFGOptions().hoistCommonInsts(true))); - - // Drop bodies of available externally objects to improve GlobalDCE. - PM.add(createEliminateAvailableExternallyPass()); - - // Now that we have optimized the program, discard unreachable functions. - PM.add(createGlobalDCEPass()); - - // FIXME: this is profitable (for compiler time) to do at -O0 too, but - // currently it damages debug info. - if (MergeFunctions) - PM.add(createMergeFunctionsPass()); -} - LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { PassManagerBuilder *PMB = new PassManagerBuilder(); return wrap(PMB); diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index 6859953de962..764fd57d245f 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -130,7 +130,7 @@ void ContextTrieNode::addFunctionSize(uint32_t FSize) { if (!FuncSize) FuncSize = 0; - FuncSize = FuncSize.getValue() + FSize; + FuncSize = FuncSize.value() + FSize; } LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 40de69bbf2cf..55fee213cd5f 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1350,14 +1350,14 @@ SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) { bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) { Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB); - return Cost ? !!Cost.getValue() : false; + return Cost ? !!Cost.value() : false; } InlineCost SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { if (Optional<InlineCost> ReplayCost = getExternalInlineAdvisorCost(*Candidate.CallInstr)) - return ReplayCost.getValue(); + return ReplayCost.value(); // Adjust threshold based on call site hotness, only do this for callsite // prioritized inliner because otherwise cost-benefit check is done earlier. int SampleThreshold = SampleColdCallSiteThreshold; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index f4d8b79a5311..535a7736454c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1660,8 +1660,9 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) { Constant *MulC; if (match(&I, m_c_FAdd(m_FMul(m_Value(X), m_ImmConstant(MulC)), m_Deferred(X)))) { - MulC = ConstantExpr::getFAdd(MulC, ConstantFP::get(I.getType(), 1.0)); - return BinaryOperator::CreateFMulFMF(X, MulC, &I); + if (Constant *NewMulC = ConstantFoldBinaryOpOperands( + Instruction::FAdd, MulC, ConstantFP::get(I.getType(), 1.0), DL)) + return BinaryOperator::CreateFMulFMF(X, NewMulC, &I); } if (Value *V = FAddCombine(Builder).simplify(&I)) @@ -1750,6 +1751,52 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, return Builder.CreateIntCast(Result, Ty, true); } +static Instruction *foldSubOfMinMax(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Type *Ty = I.getType(); + auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op1); + if (!MinMax) + return nullptr; + + // sub(add(X,Y), s/umin(X,Y)) --> s/umax(X,Y) + // sub(add(X,Y), s/umax(X,Y)) --> s/umin(X,Y) + Value *X = MinMax->getLHS(); + Value *Y = MinMax->getRHS(); + if (match(Op0, m_c_Add(m_Specific(X), m_Specific(Y))) && + (Op0->hasOneUse() || Op1->hasOneUse())) { + Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMax->getIntrinsicID()); + Function *F = Intrinsic::getDeclaration(I.getModule(), InvID, Ty); + return CallInst::Create(F, {X, Y}); + } + + // sub(add(X,Y),umin(Y,Z)) --> add(X,usub.sat(Y,Z)) + // sub(add(X,Z),umin(Y,Z)) --> add(X,usub.sat(Z,Y)) + Value *Z; + if (match(Op1, m_OneUse(m_UMin(m_Value(Y), m_Value(Z))))) { + if (match(Op0, m_OneUse(m_c_Add(m_Specific(Y), m_Value(X))))) { + Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, Ty, {Y, Z}); + return BinaryOperator::CreateAdd(X, USub); + } + if (match(Op0, m_OneUse(m_c_Add(m_Specific(Z), m_Value(X))))) { + Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, Ty, {Z, Y}); + return BinaryOperator::CreateAdd(X, USub); + } + } + + // sub Op0, smin((sub nsw Op0, Z), 0) --> smax Op0, Z + // sub Op0, smax((sub nsw Op0, Z), 0) --> smin Op0, Z + if (MinMax->isSigned() && match(Y, m_ZeroInt()) && + match(X, m_NSWSub(m_Specific(Op0), m_Value(Z)))) { + Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMax->getIntrinsicID()); + Function *F = Intrinsic::getDeclaration(I.getModule(), InvID, Ty); + return CallInst::Create(F, {Op0, Z}); + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { if (Value *V = simplifySubInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), @@ -1919,14 +1966,12 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2)); } + // If there's no chance any bit will need to borrow from an adjacent bit: + // sub C, X --> xor X, C const APInt *Op0C; - if (match(Op0, m_APInt(Op0C)) && Op0C->isMask()) { - // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known - // zero. - KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); - if ((*Op0C | RHSKnown.Zero).isAllOnes()) - return BinaryOperator::CreateXor(Op1, Op0); - } + if (match(Op0, m_APInt(Op0C)) && + (~computeKnownBits(Op1, 0, &I).Zero).isSubsetOf(*Op0C)) + return BinaryOperator::CreateXor(Op1, Op0); { Value *Y; @@ -2016,36 +2061,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { } } - if (auto *II = dyn_cast<MinMaxIntrinsic>(Op1)) { - { - // sub(add(X,Y), s/umin(X,Y)) --> s/umax(X,Y) - // sub(add(X,Y), s/umax(X,Y)) --> s/umin(X,Y) - Value *X = II->getLHS(); - Value *Y = II->getRHS(); - if (match(Op0, m_c_Add(m_Specific(X), m_Specific(Y))) && - (Op0->hasOneUse() || Op1->hasOneUse())) { - Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID()); - Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y); - return replaceInstUsesWith(I, InvMaxMin); - } - } - - { - // sub(add(X,Y),umin(Y,Z)) --> add(X,usub.sat(Y,Z)) - // sub(add(X,Z),umin(Y,Z)) --> add(X,usub.sat(Z,Y)) - Value *X, *Y, *Z; - if (match(Op1, m_OneUse(m_UMin(m_Value(Y), m_Value(Z))))) { - if (match(Op0, m_OneUse(m_c_Add(m_Specific(Y), m_Value(X))))) - return BinaryOperator::CreateAdd( - X, Builder.CreateIntrinsic(Intrinsic::usub_sat, I.getType(), - {Y, Z})); - if (match(Op0, m_OneUse(m_c_Add(m_Specific(Z), m_Value(X))))) - return BinaryOperator::CreateAdd( - X, Builder.CreateIntrinsic(Intrinsic::usub_sat, I.getType(), - {Z, Y})); - } - } - } + if (Instruction *R = foldSubOfMinMax(I, Builder)) + return R; { // If we have a subtraction between some value and a select between @@ -2437,13 +2454,15 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { // (X * C) - X --> X * (C - 1.0) if (match(Op0, m_FMul(m_Specific(Op1), m_Constant(C)))) { - Constant *CSubOne = ConstantExpr::getFSub(C, ConstantFP::get(Ty, 1.0)); - return BinaryOperator::CreateFMulFMF(Op1, CSubOne, &I); + if (Constant *CSubOne = ConstantFoldBinaryOpOperands( + Instruction::FSub, C, ConstantFP::get(Ty, 1.0), DL)) + return BinaryOperator::CreateFMulFMF(Op1, CSubOne, &I); } // X - (X * C) --> X * (1.0 - C) if (match(Op1, m_FMul(m_Specific(Op0), m_Constant(C)))) { - Constant *OneSubC = ConstantExpr::getFSub(ConstantFP::get(Ty, 1.0), C); - return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I); + if (Constant *OneSubC = ConstantFoldBinaryOpOperands( + Instruction::FSub, ConstantFP::get(Ty, 1.0), C, DL)) + return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I); } // Reassociate fsub/fadd sequences to create more fadd instructions and diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index ae8865651ece..a8f2cd79830a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1771,6 +1771,16 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { return new ZExtInst(IsZero, Ty); } + // (-(X & 1)) & Y --> (X & 1) == 0 ? 0 : Y + Value *Neg; + if (match(&I, + m_c_And(m_CombineAnd(m_Value(Neg), + m_OneUse(m_Neg(m_And(m_Value(), m_One())))), + m_Value(Y)))) { + Value *Cmp = Builder.CreateIsNull(Neg); + return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y); + } + const APInt *C; if (match(Op1, m_APInt(C))) { const APInt *XorC; @@ -1798,7 +1808,8 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { unsigned Width = Ty->getScalarSizeInBits(); const APInt *ShiftC; - if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC)))))) { + if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC))))) && + ShiftC->ult(Width)) { if (*C == APInt::getLowBitsSet(Width, Width - ShiftC->getZExtValue())) { // We are clearing high bits that were potentially set by sext+ashr: // and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp index 2540e545ae4d..0327efbf9614 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp @@ -61,7 +61,13 @@ bool isIdempotentRMW(AtomicRMWInst& RMWI) { /// equivalent to its value operand. bool isSaturating(AtomicRMWInst& RMWI) { if (auto CF = dyn_cast<ConstantFP>(RMWI.getValOperand())) - switch(RMWI.getOperation()) { + switch (RMWI.getOperation()) { + case AtomicRMWInst::FMax: + // maxnum(x, +inf) -> +inf + return !CF->isNegative() && CF->isInfinity(); + case AtomicRMWInst::FMin: + // minnum(x, -inf) -> +inf + return CF->isNegative() && CF->isInfinity(); case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: return CF->isNaN(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 67ef2e895b6c..edfdf70c2b97 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1543,7 +1543,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { !ShAmtC->containsConstantExpression()) { // Canonicalize a shift amount constant operand to modulo the bit-width. Constant *WidthC = ConstantInt::get(Ty, BitWidth); - Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC); + Constant *ModuloC = + ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL); + if (!ModuloC) + return nullptr; if (ModuloC != ShAmtC) return replaceOperand(*II, 2, ModuloC); @@ -2679,7 +2682,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Handle target specific intrinsics Optional<Instruction *> V = targetInstCombineIntrinsic(*II); if (V) - return V.getValue(); + return V.value(); break; } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index e9e779b8619b..a9a930555b3c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1756,11 +1756,12 @@ static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) { // TODO: // Try harder to find if the source integer type has less significant bits. - // For example, compute number of sign bits or compute low bit mask. + // For example, compute number of sign bits. KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I); - int LowBits = - (int)SrcTy->getScalarSizeInBits() - SrcKnown.countMinLeadingZeros(); - if (LowBits <= DestNumSigBits) + int SigBits = (int)SrcTy->getScalarSizeInBits() - + SrcKnown.countMinLeadingZeros() - + SrcKnown.countMinTrailingZeros(); + if (SigBits <= DestNumSigBits) return true; return false; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d1f89973caa1..9f6d36b85522 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1436,7 +1436,7 @@ Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) { // icmp(phi(C1, C2, ...), C) -> phi(icmp(C1, C), icmp(C2, C), ...). Constant *C = dyn_cast<Constant>(Op1); - if (!C || C->canTrap()) + if (!C) return nullptr; if (auto *Phi = dyn_cast<PHINode>(Op0)) @@ -1777,11 +1777,16 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp, return new ICmpInst(NewPred, X, Zero); } + APInt NewC2 = *C2; + KnownBits Know = computeKnownBits(And->getOperand(0), 0, And); + // Set high zeros of C2 to allow matching negated power-of-2. + NewC2 = *C2 + APInt::getHighBitsSet(C2->getBitWidth(), + Know.countMinLeadingZeros()); + // Restrict this fold only for single-use 'and' (PR10267). // ((%x & C) == 0) --> %x u< (-C) iff (-C) is power of two. - if ((~(*C2) + 1).isPowerOf2()) { - Constant *NegBOC = - ConstantExpr::getNeg(cast<Constant>(And->getOperand(1))); + if (NewC2.isNegatedPowerOf2()) { + Constant *NegBOC = ConstantInt::get(And->getType(), -NewC2); auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(NewPred, X, NegBOC); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2a34edbf6cb8..8cb09cbac86f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -505,20 +505,23 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { Constant *C1; if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) { // (C1 / X) * C --> (C * C1) / X - Constant *CC1 = ConstantExpr::getFMul(C, C1); - if (CC1->isNormalFP()) + Constant *CC1 = + ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL); + if (CC1 && CC1->isNormalFP()) return BinaryOperator::CreateFDivFMF(CC1, X, &I); } if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) { // (X / C1) * C --> X * (C / C1) - Constant *CDivC1 = ConstantExpr::getFDiv(C, C1); - if (CDivC1->isNormalFP()) + Constant *CDivC1 = + ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL); + if (CDivC1 && CDivC1->isNormalFP()) return BinaryOperator::CreateFMulFMF(X, CDivC1, &I); // If the constant was a denormal, try reassociating differently. // (X / C1) * C --> X / (C1 / C) - Constant *C1DivC = ConstantExpr::getFDiv(C1, C); - if (Op0->hasOneUse() && C1DivC->isNormalFP()) + Constant *C1DivC = + ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL); + if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP()) return BinaryOperator::CreateFDivFMF(X, C1DivC, &I); } @@ -527,15 +530,19 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { // further folds and (X * C) + C2 is 'fma'. if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) { // (X + C1) * C --> (X * C) + (C * C1) - Constant *CC1 = ConstantExpr::getFMul(C, C1); - Value *XC = Builder.CreateFMulFMF(X, C, &I); - return BinaryOperator::CreateFAddFMF(XC, CC1, &I); + if (Constant *CC1 = ConstantFoldBinaryOpOperands( + Instruction::FMul, C, C1, DL)) { + Value *XC = Builder.CreateFMulFMF(X, C, &I); + return BinaryOperator::CreateFAddFMF(XC, CC1, &I); + } } if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) { // (C1 - X) * C --> (C * C1) - (X * C) - Constant *CC1 = ConstantExpr::getFMul(C, C1); - Value *XC = Builder.CreateFMulFMF(X, C, &I); - return BinaryOperator::CreateFSubFMF(CC1, XC, &I); + if (Constant *CC1 = ConstantFoldBinaryOpOperands( + Instruction::FMul, C, C1, DL)) { + Value *XC = Builder.CreateFMulFMF(X, C, &I); + return BinaryOperator::CreateFSubFMF(CC1, XC, &I); + } } } @@ -1232,8 +1239,10 @@ static Instruction *foldFDivConstantDivisor(BinaryOperator &I) { // on all targets. // TODO: Use Intrinsic::canonicalize or let function attributes tell us that // denorms are flushed? - auto *RecipC = ConstantExpr::getFDiv(ConstantFP::get(I.getType(), 1.0), C); - if (!RecipC->isNormalFP()) + const DataLayout &DL = I.getModule()->getDataLayout(); + auto *RecipC = ConstantFoldBinaryOpOperands( + Instruction::FDiv, ConstantFP::get(I.getType(), 1.0), C, DL); + if (!RecipC || !RecipC->isNormalFP()) return nullptr; // X / C --> X * (1 / C) @@ -1256,12 +1265,13 @@ static Instruction *foldFDivConstantDividend(BinaryOperator &I) { // Try to reassociate C / X expressions where X includes another constant. Constant *C2, *NewC = nullptr; + const DataLayout &DL = I.getModule()->getDataLayout(); if (match(I.getOperand(1), m_FMul(m_Value(X), m_Constant(C2)))) { // C / (X * C2) --> (C / C2) / X - NewC = ConstantExpr::getFDiv(C, C2); + NewC = ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C2, DL); } else if (match(I.getOperand(1), m_FDiv(m_Value(X), m_Constant(C2)))) { // C / (X / C2) --> (C * C2) / X - NewC = ConstantExpr::getFMul(C, C2); + NewC = ConstantFoldBinaryOpOperands(Instruction::FMul, C, C2, DL); } // Disallow denormal constants because we don't know what would happen // on all targets. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 9d4c01ac03e2..febd0f51d25f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -925,7 +925,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic( *II, DemandedMask, Known, KnownBitsComputed); if (V) - return V.getValue(); + return V.value(); break; } } @@ -1636,7 +1636,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, *II, DemandedElts, UndefElts, UndefElts2, UndefElts3, simplifyAndSetOp); if (V) - return V.getValue(); + return V.value(); break; } } // switch on IntrinsicID diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 22659a8e4951..b80c58183dd5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -228,8 +228,9 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) { // truncate a subset of scalar bits of an insert op. if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) { Value *Scalar; + Value *Vec; uint64_t InsIndexC; - if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar), + if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar), m_ConstantInt(InsIndexC)))) return nullptr; @@ -239,8 +240,19 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) { // of elements 4-7 of the bitcasted vector. unsigned NarrowingRatio = NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue(); - if (ExtIndexC / NarrowingRatio != InsIndexC) + + if (ExtIndexC / NarrowingRatio != InsIndexC) { + // Remove insertelement, if we don't use the inserted element. + // extractelement (bitcast (insertelement (Vec, b)), a) -> + // extractelement (bitcast (Vec), a) + // FIXME: this should be removed to SimplifyDemandedVectorElts, + // once scale vectors are supported. + if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) { + Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType()); + return ExtractElementInst::Create(NewBC, Ext.getIndexOperand()); + } return nullptr; + } // We are extracting part of the original scalar. How that scalar is // inserted into the vector depends on the endian-ness. Example: diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 0816a4a575d9..75520a0c8d5f 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -523,11 +523,12 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" // if C1 and C2 are constants. Value *A, *B; - Constant *C1, *C2; + Constant *C1, *C2, *CRes; if (Op0 && Op1 && Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) && - match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) { + match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) && + (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) { bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0) && hasNoUnsignedWrap(*Op1); @@ -544,7 +545,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { InsertNewInstWith(NewBO, I); NewBO->takeName(Op1); replaceOperand(I, 0, NewBO); - replaceOperand(I, 1, ConstantExpr::get(Opcode, C1, C2)); + replaceOperand(I, 1, CRes); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); @@ -1324,6 +1325,11 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { if (!isGuaranteedToTransferExecutionToSuccessor(&*BBIter)) return nullptr; + // Fold constants for the predecessor block with constant incoming values. + Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL); + if (!NewC) + return nullptr; + // Make a new binop in the predecessor block with the non-constant incoming // values. Builder.SetInsertPoint(PredBlockBranch); @@ -1333,9 +1339,6 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO)) NotFoldedNewBO->copyIRFlags(&BO); - // Fold constants for the predecessor block with constant incoming values. - Constant *NewC = ConstantExpr::get(BO.getOpcode(), C0, C1); - // Replace the binop with a phi of the new values. The old phis are dead. PHINode *NewPhi = PHINode::Create(BO.getType(), 2); NewPhi->addIncoming(NewBO, OtherBB); @@ -1774,9 +1777,10 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { // for target-independent shuffle creation. if (I >= SrcVecNumElts || ShMask[I] < 0) { Constant *MaybeUndef = - ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt) - : ConstantExpr::get(Opcode, CElt, UndefScalar); - if (!match(MaybeUndef, m_Undef())) { + ConstOp1 + ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL) + : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL); + if (!MaybeUndef || !match(MaybeUndef, m_Undef())) { MayChange = false; break; } diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 7a5a74aa4fff..4fed4bd18fb1 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -101,6 +101,7 @@ static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL; static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000; static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 44; static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52; +static const uint64_t kMIPS_ShadowOffsetN32 = 1ULL << 29; static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; @@ -476,6 +477,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, TargetTriple.getArch() == Triple::ppc64le; bool IsSystemZ = TargetTriple.getArch() == Triple::systemz; bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64; + bool IsMIPSN32ABI = TargetTriple.getEnvironment() == Triple::GNUABIN32; bool IsMIPS32 = TargetTriple.isMIPS32(); bool IsMIPS64 = TargetTriple.isMIPS64(); bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); @@ -496,6 +498,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, if (LongSize == 32) { if (IsAndroid) Mapping.Offset = kDynamicShadowSentinel; + else if (IsMIPSN32ABI) + Mapping.Offset = kMIPS_ShadowOffsetN32; else if (IsMIPS32) Mapping.Offset = kMIPS32_ShadowOffset32; else if (IsFreeBSD) diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp index b11b84d65d23..57c491436b93 100644 --- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp +++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp @@ -39,7 +39,8 @@ addModuleFlags(Module &M, Nodes.push_back(MDNode::get(Context, Vals)); } - M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes)); + M.addModuleFlag(Module::Append, "CG Profile", + MDTuple::getDistinct(Context, Nodes)); return true; } diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 218b4bbfb6c0..b01c74320380 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -180,11 +180,31 @@ static cl::opt<bool> ClWithTls( "platforms that support this"), cl::Hidden, cl::init(true)); -static cl::opt<bool> - ClRecordStackHistory("hwasan-record-stack-history", - cl::desc("Record stack frames with tagged allocations " - "in a thread-local ring buffer"), - cl::Hidden, cl::init(true)); +// Mode for selecting how to insert frame record info into the stack ring +// buffer. +enum RecordStackHistoryMode { + // Do not record frame record info. + none, + + // Insert instructions into the prologue for storing into the stack ring + // buffer directly. + instr, + + // Add a call to __hwasan_add_frame_record in the runtime. + libcall, +}; + +static cl::opt<RecordStackHistoryMode> ClRecordStackHistory( + "hwasan-record-stack-history", + cl::desc("Record stack frames with tagged allocations in a thread-local " + "ring buffer"), + cl::values(clEnumVal(none, "Do not record stack ring history"), + clEnumVal(instr, "Insert instructions into the prologue for " + "storing into the stack ring buffer directly"), + clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for " + "storing into the stack ring buffer")), + cl::Hidden, cl::init(instr)); + static cl::opt<bool> ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics", cl::desc("instrument memory intrinsics"), @@ -313,6 +333,7 @@ public: Value *getPC(IRBuilder<> &IRB); Value *getSP(IRBuilder<> &IRB); + Value *getFrameRecordInfo(IRBuilder<> &IRB); void instrumentPersonalityFunctions(); @@ -378,6 +399,7 @@ private: FunctionCallee HwasanTagMemoryFunc; FunctionCallee HwasanGenerateTagFunc; + FunctionCallee HwasanRecordFrameRecordFunc; Constant *ShadowGlobal; @@ -629,6 +651,9 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) { HwasanGenerateTagFunc = M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty); + HwasanRecordFrameRecordFunc = M.getOrInsertFunction( + "__hwasan_add_frame_record", IRB.getVoidTy(), Int64Ty); + ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(IRB.getInt8Ty(), 0)); @@ -1132,6 +1157,21 @@ Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) { return CachedSP; } +Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) { + // Prepare ring buffer data. + Value *PC = getPC(IRB); + Value *SP = getSP(IRB); + + // Mix SP and PC. + // Assumptions: + // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) + // SP is 0xsssssssssssSSSS0 (4 lower bits are zero) + // We only really need ~20 lower non-zero bits (SSSS), so we mix like this: + // 0xSSSSPPPPPPPPPPPP + SP = IRB.CreateShl(SP, 44); + return IRB.CreateOr(PC, SP); +} + void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) { if (!Mapping.InTls) ShadowBase = getShadowNonTls(IRB); @@ -1141,50 +1181,67 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) { if (!WithFrameRecord && ShadowBase) return; - Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy); - assert(SlotPtr); - - Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr); - // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI. - Value *ThreadLongMaybeUntagged = - TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong); + Value *SlotPtr = nullptr; + Value *ThreadLong = nullptr; + Value *ThreadLongMaybeUntagged = nullptr; + + auto getThreadLongMaybeUntagged = [&]() { + if (!SlotPtr) + SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy); + if (!ThreadLong) + ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr); + // Extract the address field from ThreadLong. Unnecessary on AArch64 with + // TBI. + return TargetTriple.isAArch64() ? ThreadLong + : untagPointer(IRB, ThreadLong); + }; if (WithFrameRecord) { - StackBaseTag = IRB.CreateAShr(ThreadLong, 3); - - // Prepare ring buffer data. - Value *PC = getPC(IRB); - Value *SP = getSP(IRB); - - // Mix SP and PC. - // Assumptions: - // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) - // SP is 0xsssssssssssSSSS0 (4 lower bits are zero) - // We only really need ~20 lower non-zero bits (SSSS), so we mix like this: - // 0xSSSSPPPPPPPPPPPP - SP = IRB.CreateShl(SP, 44); - - // Store data to ring buffer. - Value *RecordPtr = - IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0)); - IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr); - - // Update the ring buffer. Top byte of ThreadLong defines the size of the - // buffer in pages, it must be a power of two, and the start of the buffer - // must be aligned by twice that much. Therefore wrap around of the ring - // buffer is simply Addr &= ~((ThreadLong >> 56) << 12). - // The use of AShr instead of LShr is due to - // https://bugs.llvm.org/show_bug.cgi?id=39030 - // Runtime library makes sure not to use the highest bit. - Value *WrapMask = IRB.CreateXor( - IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true), - ConstantInt::get(IntptrTy, (uint64_t)-1)); - Value *ThreadLongNew = IRB.CreateAnd( - IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask); - IRB.CreateStore(ThreadLongNew, SlotPtr); + switch (ClRecordStackHistory) { + case libcall: { + // Emit a runtime call into hwasan rather than emitting instructions for + // recording stack history. + Value *FrameRecordInfo = getFrameRecordInfo(IRB); + IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo}); + break; + } + case instr: { + ThreadLongMaybeUntagged = getThreadLongMaybeUntagged(); + + StackBaseTag = IRB.CreateAShr(ThreadLong, 3); + + // Store data to ring buffer. + Value *FrameRecordInfo = getFrameRecordInfo(IRB); + Value *RecordPtr = IRB.CreateIntToPtr(ThreadLongMaybeUntagged, + IntptrTy->getPointerTo(0)); + IRB.CreateStore(FrameRecordInfo, RecordPtr); + + // Update the ring buffer. Top byte of ThreadLong defines the size of the + // buffer in pages, it must be a power of two, and the start of the buffer + // must be aligned by twice that much. Therefore wrap around of the ring + // buffer is simply Addr &= ~((ThreadLong >> 56) << 12). + // The use of AShr instead of LShr is due to + // https://bugs.llvm.org/show_bug.cgi?id=39030 + // Runtime library makes sure not to use the highest bit. + Value *WrapMask = IRB.CreateXor( + IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true), + ConstantInt::get(IntptrTy, (uint64_t)-1)); + Value *ThreadLongNew = IRB.CreateAnd( + IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask); + IRB.CreateStore(ThreadLongNew, SlotPtr); + break; + } + case none: { + llvm_unreachable( + "A stack history recording mode should've been selected."); + } + } } if (!ShadowBase) { + if (!ThreadLongMaybeUntagged) + ThreadLongMaybeUntagged = getThreadLongMaybeUntagged(); + // Get shadow base address by aligning RecordPtr up. // Note: this is not correct if the pointer is already aligned. // Runtime library will make sure this never happens. @@ -1408,7 +1465,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F, Instruction *InsertPt = &*F.getEntryBlock().begin(); IRBuilder<> EntryIRB(InsertPt); emitPrologue(EntryIRB, - /*WithFrameRecord*/ ClRecordStackHistory && + /*WithFrameRecord*/ ClRecordStackHistory != none && Mapping.WithFrameRecord && !SInfo.AllocasToInstrument.empty()); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 7843b1522830..3572cb3b50e2 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1244,6 +1244,7 @@ bool InstrProfiling::emitRuntimeHook() { auto *Var = new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage, nullptr, getInstrProfRuntimeHookVarName()); + Var->setVisibility(GlobalValue::HiddenVisibility); if (TT.isOSBinFormatELF() && !TT.isPS()) { // Mark the user variable as used so that it isn't stripped out. diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index c33b1b3b1a5c..d4aa31db8337 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -486,7 +486,7 @@ static bool isTsanAtomic(const Instruction *I) { if (!SSID) return false; if (isa<LoadInst>(I) || isa<StoreInst>(I)) - return SSID.getValue() != SyncScope::SingleThread; + return SSID.value() != SyncScope::SingleThread; return true; } diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index 8a1761505d59..fe6f9486ab0c 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -611,9 +611,9 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S, ConstCand->ConstInt->getValue()); if (Diff) { const InstructionCost ImmCosts = - TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.getValue(), Ty); + TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.value(), Ty); Cost -= ImmCosts; - LLVM_DEBUG(dbgs() << "Offset " << Diff.getValue() << " " + LLVM_DEBUG(dbgs() << "Offset " << Diff.value() << " " << "has penalty: " << ImmCosts << "\n" << "Adjusted cost: " << Cost << "\n"); } diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 783301fe589e..b460637b7d88 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -748,14 +748,14 @@ void GVNPass::printPipeline( OS << "<"; if (Options.AllowPRE != None) - OS << (Options.AllowPRE.getValue() ? "" : "no-") << "pre;"; + OS << (Options.AllowPRE.value() ? "" : "no-") << "pre;"; if (Options.AllowLoadPRE != None) - OS << (Options.AllowLoadPRE.getValue() ? "" : "no-") << "load-pre;"; + OS << (Options.AllowLoadPRE.value() ? "" : "no-") << "load-pre;"; if (Options.AllowLoadPRESplitBackedge != None) - OS << (Options.AllowLoadPRESplitBackedge.getValue() ? "" : "no-") + OS << (Options.AllowLoadPRESplitBackedge.value() ? "" : "no-") << "split-backedge-load-pre;"; if (Options.AllowMemDep != None) - OS << (Options.AllowMemDep.getValue() ? "" : "no-") << "memdep"; + OS << (Options.AllowMemDep.value() ? "" : "no-") << "memdep"; OS << ">"; } @@ -1059,8 +1059,8 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo, if (DT->dominates(cast<Instruction>(OtherAccess), cast<Instruction>(U))) OtherAccess = U; else - assert(DT->dominates(cast<Instruction>(U), - cast<Instruction>(OtherAccess))); + assert(U == OtherAccess || DT->dominates(cast<Instruction>(U), + cast<Instruction>(OtherAccess))); } else OtherAccess = U; } @@ -1494,14 +1494,6 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, return false; } - // FIXME: Can we support the fallthrough edge? - if (isa<CallBrInst>(Pred->getTerminator())) { - LLVM_DEBUG( - dbgs() << "COULD NOT PRE LOAD BECAUSE OF CALLBR CRITICAL EDGE '" - << Pred->getName() << "': " << *Load << '\n'); - return false; - } - if (LoadBB->isEHPad()) { LLVM_DEBUG( dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '" @@ -2875,11 +2867,6 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) { if (isa<IndirectBrInst>(PREPred->getTerminator())) return false; - // Don't do PRE across callbr. - // FIXME: Can we do this across the fallthrough edge? - if (isa<CallBrInst>(PREPred->getTerminator())) - return false; - // We can't do PRE safely on a critical edge, so instead we schedule // the edge to be split and perform the PRE the next time we iterate // on the function. diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index e977dd18be9f..a9ca0bdc8f7b 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -106,13 +106,18 @@ static cl::opt<bool> VerifyIndvars( static cl::opt<ReplaceExitVal> ReplaceExitValue( "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), - cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), - clEnumValN(OnlyCheapRepl, "cheap", - "only replace exit value when the cost is cheap"), - clEnumValN(NoHardUse, "noharduse", - "only replace exit values when loop def likely dead"), - clEnumValN(AlwaysRepl, "always", - "always replace exit value whenever possible"))); + cl::values( + clEnumValN(NeverRepl, "never", "never replace exit value"), + clEnumValN(OnlyCheapRepl, "cheap", + "only replace exit value when the cost is cheap"), + clEnumValN( + UnusedIndVarInLoop, "unusedindvarinloop", + "only replace exit value when it is an unused " + "induction variable in the loop and has cheap replacement cost"), + clEnumValN(NoHardUse, "noharduse", + "only replace exit values when loop def likely dead"), + clEnumValN(AlwaysRepl, "always", + "always replace exit value whenever possible"))); static cl::opt<bool> UsePostIncrementRanges( "indvars-post-increment-ranges", cl::Hidden, @@ -1302,15 +1307,39 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, } static void replaceLoopPHINodesWithPreheaderValues( - Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { + LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); auto *LoopPreheader = L->getLoopPreheader(); auto *LoopHeader = L->getHeader(); + SmallVector<Instruction *> Worklist; for (auto &PN : LoopHeader->phis()) { auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); + for (User *U : PN.users()) + Worklist.push_back(cast<Instruction>(U)); PN.replaceAllUsesWith(PreheaderIncoming); DeadInsts.emplace_back(&PN); } + + // Replacing with the preheader value will often allow IV users to simplify + // (especially if the preheader value is a constant). + SmallPtrSet<Instruction *, 16> Visited; + while (!Worklist.empty()) { + auto *I = cast<Instruction>(Worklist.pop_back_val()); + if (!Visited.insert(I).second) + continue; + + // Don't simplify instructions outside the loop. + if (!L->contains(I)) + continue; + + Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout()); + if (Res && LI->replacementPreservesLCSSAForm(I, Res)) { + for (User *U : I->users()) + Worklist.push_back(cast<Instruction>(U)); + I->replaceAllUsesWith(Res); + DeadInsts.emplace_back(I); + } + } } static void replaceWithInvariantCond( @@ -1549,14 +1578,19 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { if (!BI) return true; - // If already constant, nothing to do. - if (isa<Constant>(BI->getCondition())) - return true; - // Likewise, the loop latch must be dominated by the exiting BB. if (!DT->dominates(ExitingBB, L->getLoopLatch())) return true; + if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { + // If already constant, nothing to do. However, if this is an + // unconditional exit, we can still replace header phis with their + // preheader value. + if (!L->contains(BI->getSuccessor(CI->isNullValue()))) + replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts); + return true; + } + return false; }); @@ -1640,7 +1674,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // the header PHIs with values coming from the preheader. if (ExitCount->isZero()) { foldExit(L, ExitingBB, true, DeadInsts); - replaceLoopPHINodesWithPreheaderValues(L, DeadInsts); + replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts); Changed = true; continue; } diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 799669a19796..b54cf5e7cb20 100644 --- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -1710,7 +1710,7 @@ IntersectSignedRange(ScalarEvolution &SE, return None; if (!R1) return R2; - auto &R1Value = R1.getValue(); + auto &R1Value = R1.value(); // We never return empty ranges from this function, and R1 is supposed to be // a result of intersection. Thus, R1 is never empty. assert(!R1Value.isEmpty(SE, /* IsSigned */ true) && @@ -1739,7 +1739,7 @@ IntersectUnsignedRange(ScalarEvolution &SE, return None; if (!R1) return R2; - auto &R1Value = R1.getValue(); + auto &R1Value = R1.value(); // We never return empty ranges from this function, and R1 is supposed to be // a result of intersection. Thus, R1 is never empty. assert(!R1Value.isEmpty(SE, /* IsSigned */ false) && @@ -1950,13 +1950,12 @@ bool InductiveRangeCheckElimination::run( LS.IsSignedPredicate); if (Result) { auto MaybeSafeIterRange = - IntersectRange(SE, SafeIterRange, Result.getValue()); + IntersectRange(SE, SafeIterRange, Result.value()); if (MaybeSafeIterRange) { - assert( - !MaybeSafeIterRange.getValue().isEmpty(SE, LS.IsSignedPredicate) && - "We should never return empty ranges!"); + assert(!MaybeSafeIterRange.value().isEmpty(SE, LS.IsSignedPredicate) && + "We should never return empty ranges!"); RangeChecksToEliminate.push_back(IRC); - SafeIterRange = MaybeSafeIterRange.getValue(); + SafeIterRange = MaybeSafeIterRange.value(); } } } @@ -1964,8 +1963,7 @@ bool InductiveRangeCheckElimination::run( if (!SafeIterRange) return false; - LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT, - SafeIterRange.getValue()); + LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT, SafeIterRange.value()); bool Changed = LC.run(); if (Changed) { diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 5caefc422921..b31eab50c5ec 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1459,9 +1459,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { // Add all the unavailable predecessors to the PredsToSplit list. for (BasicBlock *P : predecessors(LoadBB)) { // If the predecessor is an indirect goto, we can't split the edge. - // Same for CallBr. - if (isa<IndirectBrInst>(P->getTerminator()) || - isa<CallBrInst>(P->getTerminator())) + if (isa<IndirectBrInst>(P->getTerminator())) return false; if (!AvailablePredSet.count(P)) @@ -1685,9 +1683,8 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, } // If the predecessor ends with an indirect goto, we can't change its - // destination. Same for CallBr. - if (isa<IndirectBrInst>(Pred->getTerminator()) || - isa<CallBrInst>(Pred->getTerminator())) + // destination. + if (isa<IndirectBrInst>(Pred->getTerminator())) continue; PredToDestList.emplace_back(Pred, DestBB); @@ -1924,10 +1921,9 @@ bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) { } // If any of predecessors end with an indirect goto, we can't change its - // destination. Same for CallBr. + // destination. if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) { - return isa<IndirectBrInst>(Pred->getTerminator()) || - isa<CallBrInst>(Pred->getTerminator()); + return isa<IndirectBrInst>(Pred->getTerminator()); })) return false; @@ -2173,6 +2169,9 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB, BasicBlock *ZeroPred = nullptr; BasicBlock *OnePred = nullptr; for (BasicBlock *P : predecessors(PredBB)) { + // If PredPred ends with IndirectBrInst, we can't handle it. + if (isa<IndirectBrInst>(P->getTerminator())) + continue; if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>( evaluateOnPredecessorEdge(BB, P, Cond))) { if (CI->isZero()) { diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 492f4e40395a..f54264b1dca6 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1508,8 +1508,7 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad()) return false; for (BasicBlock *BBPred : predecessors(BB)) { - if (isa<IndirectBrInst>(BBPred->getTerminator()) || - isa<CallBrInst>(BBPred->getTerminator())) + if (isa<IndirectBrInst>(BBPred->getTerminator())) return false; } return true; diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp index 03a10cb36bb6..b178bcae3b0e 100644 --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp @@ -602,7 +602,7 @@ private: : LLVMLoopDistributeFollowupCoincident}); if (PartitionID) { Loop *NewLoop = Part->getDistributedLoop(); - NewLoop->setLoopID(PartitionID.getValue()); + NewLoop->setLoopID(PartitionID.value()); } } }; @@ -826,7 +826,7 @@ public: {LLVMLoopDistributeFollowupAll, LLVMLoopDistributeFollowupFallback}, "llvm.loop.distribute.", true) - .getValue(); + .value(); LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID); } diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 88d6a7aff3c9..d908c151d9f2 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1483,7 +1483,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // anything where the alignment isn't at least the element size. assert((StoreAlign && LoadAlign) && "Expect unordered load/store to have align."); - if (StoreAlign.getValue() < StoreSize || LoadAlign.getValue() < StoreSize) + if (StoreAlign.value() < StoreSize || LoadAlign.value() < StoreSize) return Changed; // If the element.atomic memcpy is not lowered into explicit @@ -1497,7 +1497,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // Note that unordered atomic loads/stores are *required* by the spec to // have an alignment but non-atomic loads/stores may not. NewCall = Builder.CreateElementUnorderedAtomicMemCpy( - StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(), + StoreBasePtr, StoreAlign.value(), LoadBasePtr, LoadAlign.value(), NumBytes, StoreSize, AATags.TBAA, AATags.TBAAStruct, AATags.Scope, AATags.NoAlias); } diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 9959e408e2e2..4ef7809c6681 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5601,27 +5601,6 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, DeadInsts.emplace_back(OperandIsInstr); } -// Check if there are any loop exit values which are only used once within the -// loop which may potentially be optimized with a call to rewriteLoopExitValue. -static bool LoopExitValHasSingleUse(Loop *L) { - BasicBlock *ExitBB = L->getExitBlock(); - if (!ExitBB) - return false; - - for (PHINode &ExitPhi : ExitBB->phis()) { - if (ExitPhi.getNumIncomingValues() != 1) - break; - - BasicBlock *Pred = ExitPhi.getIncomingBlock(0); - Value *IVNext = ExitPhi.getIncomingValueForBlock(Pred); - // One use would be the exit phi node, and there should be only one other - // use for this to be considered. - if (IVNext->getNumUses() == 2) - return true; - } - return false; -} - /// Rewrite all the fixup locations with new values, following the chosen /// solution. void LSRInstance::ImplementSolution( @@ -6406,8 +6385,8 @@ static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, // less DWARF ops than an iteration count-based expression. if (Optional<APInt> Offset = SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) { - if (Offset.getValue().getMinSignedBits() <= 64) - SalvageExpr->createOffsetExpr(Offset.getValue().getSExtValue(), + if (Offset.value().getMinSignedBits() <= 64) + SalvageExpr->createOffsetExpr(Offset.value().getSExtValue(), LSRInductionVar); } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr, SE)) @@ -6627,12 +6606,12 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, // When this is the case, if the exit value of the IV can be calculated using // SCEV, we can replace the exit block PHI with the final value of the IV and // skip the updates in each loop iteration. - if (L->isRecursivelyLCSSAForm(DT, LI) && LoopExitValHasSingleUse(L)) { + if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) { SmallVector<WeakTrackingVH, 16> DeadInsts; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); SCEVExpander Rewriter(SE, DL, "lsr", false); int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT, - OnlyCheapRepl, DeadInsts); + UnusedIndVarInLoop, DeadInsts); if (Rewrites) { Changed = true; RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI, diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 8c2868563227..64fcdfa15aa9 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -373,7 +373,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupRemainderInner}); if (NewInnerEpilogueLoopID) - SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue()); + SubLoop->setLoopID(NewInnerEpilogueLoopID.value()); // Find trip count and trip multiple BasicBlock *Latch = L->getLoopLatch(); @@ -403,14 +403,14 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupRemainderOuter}); if (NewOuterEpilogueLoopID) - EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue()); + EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.value()); } Optional<MDNode *> NewInnerLoopID = makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupInner}); if (NewInnerLoopID) - SubLoop->setLoopID(NewInnerLoopID.getValue()); + SubLoop->setLoopID(NewInnerLoopID.value()); else SubLoop->setLoopID(OrigSubLoopID); @@ -419,7 +419,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter}); if (NewOuterLoopID) { - L->setLoopID(NewOuterLoopID.getValue()); + L->setLoopID(NewOuterLoopID.value()); // Do not setLoopAlreadyUnrolled if a followup was given. return UnrollResult; diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index fda86afe5f9d..de5833f60adc 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1324,7 +1324,7 @@ static LoopUnrollResult tryToUnrollLoop( makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); if (RemainderLoopID) - RemainderLoop->setLoopID(RemainderLoopID.getValue()); + RemainderLoop->setLoopID(RemainderLoopID.value()); } if (UnrollResult != LoopUnrollResult::FullyUnrolled) { @@ -1332,7 +1332,7 @@ static LoopUnrollResult tryToUnrollLoop( makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupUnrolled}); if (NewLoopID) { - L->setLoopID(NewLoopID.getValue()); + L->setLoopID(NewLoopID.value()); // Do not setLoopAlreadyUnrolled if loop attributes have been specified // explicitly. @@ -1645,15 +1645,15 @@ void LoopUnrollPass::printPipeline( OS, MapClassName2PassName); OS << "<"; if (UnrollOpts.AllowPartial != None) - OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;"; + OS << (UnrollOpts.AllowPartial.value() ? "" : "no-") << "partial;"; if (UnrollOpts.AllowPeeling != None) - OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;"; + OS << (UnrollOpts.AllowPeeling.value() ? "" : "no-") << "peeling;"; if (UnrollOpts.AllowRuntime != None) - OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;"; + OS << (UnrollOpts.AllowRuntime.value() ? "" : "no-") << "runtime;"; if (UnrollOpts.AllowUpperBound != None) - OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;"; + OS << (UnrollOpts.AllowUpperBound.value() ? "" : "no-") << "upperbound;"; if (UnrollOpts.AllowProfileBasedPeeling != None) - OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-") + OS << (UnrollOpts.AllowProfileBasedPeeling.value() ? "" : "no-") << "profile-peeling;"; if (UnrollOpts.FullUnrollMaxCount != None) OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";"; diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index da1737979305..75f0896d4845 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -1929,11 +1930,23 @@ Value *ReassociatePass::OptimizeExpression(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops) { // Now that we have the linearized expression tree, try to optimize it. // Start by folding any constants that we found. + const DataLayout &DL = I->getModule()->getDataLayout(); Constant *Cst = nullptr; unsigned Opcode = I->getOpcode(); - while (!Ops.empty() && isa<Constant>(Ops.back().Op)) { - Constant *C = cast<Constant>(Ops.pop_back_val().Op); - Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C; + while (!Ops.empty()) { + if (auto *C = dyn_cast<Constant>(Ops.back().Op)) { + if (!Cst) { + Ops.pop_back(); + Cst = C; + continue; + } + if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) { + Ops.pop_back(); + Cst = Res; + continue; + } + } + break; } // If there was nothing but constants then we are done. if (Ops.empty()) diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index e9983ff82176..079b2fc973b9 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -769,8 +769,7 @@ llvm::SplitAllCriticalEdges(Function &F, unsigned NumBroken = 0; for (BasicBlock &BB : F) { Instruction *TI = BB.getTerminator(); - if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI) && - !isa<CallBrInst>(TI)) + if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI)) for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) if (SplitCriticalEdge(TI, i, Options)) ++NumBroken; @@ -1132,9 +1131,7 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); - assert(!isa<CallBrInst>(Preds[i]->getTerminator()) && - "Cannot split an edge from a CallBrInst"); - Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); + Preds[i]->getTerminator()->replaceSuccessorWith(BB, NewBB); } // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 0b36e8708a03..9c595401ce29 100644 --- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -129,8 +129,7 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum, SmallVector<BasicBlock *, 4> LoopPreds; // Check if extra modifications will be required to preserve loop-simplify // form after splitting. If it would require splitting blocks with IndirectBr - // or CallBr terminators, bail out if preserving loop-simplify form is - // requested. + // terminators, bail out if preserving loop-simplify form is requested. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { @@ -156,10 +155,7 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum, // Loop-simplify form can be preserved, if we can split all in-loop // predecessors. if (any_of(LoopPreds, [](BasicBlock *Pred) { - const Instruction *T = Pred->getTerminator(); - if (const auto *CBR = dyn_cast<CallBrInst>(T)) - return CBR->getDefaultDest() != Pred; - return isa<IndirectBrInst>(T); + return isa<IndirectBrInst>(Pred->getTerminator()); })) { if (Options.PreserveLoopSimplify) return nullptr; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f94d854f7ee8..421f1f329f07 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -927,6 +927,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::AlwaysInline: case Attribute::Cold: case Attribute::DisableSanitizerInstrumentation: + case Attribute::FnRetThunkExtern: case Attribute::Hot: case Attribute::NoRecurse: case Attribute::InlineHint: @@ -1777,7 +1778,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); if (Count) newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + ProfileCount(Count.value(), Function::PCT_Real)); // FIXME BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); } diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index 205f7a7d9ed2..24126b5ab67b 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -961,8 +961,13 @@ createDebugifyFunctionPass(enum DebugifyMode Mode, } PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) { - applyDebugifyMetadata(M, M.functions(), - "ModuleDebugify: ", /*ApplyToMF*/ nullptr); + if (Mode == DebugifyMode::SyntheticDebugInfo) + applyDebugifyMetadata(M, M.functions(), + "ModuleDebugify: ", /*ApplyToMF*/ nullptr); + else + collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass, + "ModuleDebugify (original debuginfo)", + NameOfWrappedPass); return PreservedAnalyses::all(); } @@ -992,8 +997,14 @@ FunctionPass *createCheckDebugifyFunctionPass( PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M, ModuleAnalysisManager &) { - checkDebugifyMetadata(M, M.functions(), "", "CheckModuleDebugify", false, - nullptr); + if (Mode == DebugifyMode::SyntheticDebugInfo) + checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass, + "CheckModuleDebugify", Strip, StatsMap); + else + checkDebugInfoMetadata( + M, M.functions(), *DebugInfoBeforePass, + "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass, + OrigDIVerifyBugsReportFilePath); return PreservedAnalyses::all(); } @@ -1006,13 +1017,15 @@ static bool isIgnoredPass(StringRef PassID) { void DebugifyEachInstrumentation::registerCallbacks( PassInstrumentationCallbacks &PIC) { - PIC.registerBeforeNonSkippedPassCallback([](StringRef P, Any IR) { + PIC.registerBeforeNonSkippedPassCallback([this](StringRef P, Any IR) { if (isIgnoredPass(P)) return; if (any_isa<const Function *>(IR)) - applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR))); + applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR)), + Mode, DebugInfoBeforePass, P); else if (any_isa<const Module *>(IR)) - applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR))); + applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR)), + Mode, DebugInfoBeforePass, P); }); PIC.registerAfterPassCallback([this](StringRef P, Any IR, const PreservedAnalyses &PassPA) { @@ -1022,12 +1035,24 @@ void DebugifyEachInstrumentation::registerCallbacks( auto &F = *const_cast<Function *>(any_cast<const Function *>(IR)); Module &M = *F.getParent(); auto It = F.getIterator(); - checkDebugifyMetadata(M, make_range(It, std::next(It)), P, - "CheckFunctionDebugify", /*Strip=*/true, &StatsMap); + if (Mode == DebugifyMode::SyntheticDebugInfo) + checkDebugifyMetadata(M, make_range(It, std::next(It)), P, + "CheckFunctionDebugify", /*Strip=*/true, DIStatsMap); + else + checkDebugInfoMetadata( + M, make_range(It, std::next(It)), *DebugInfoBeforePass, + "CheckModuleDebugify (original debuginfo)", + P, OrigDIVerifyBugsReportFilePath); } else if (any_isa<const Module *>(IR)) { auto &M = *const_cast<Module *>(any_cast<const Module *>(IR)); - checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify", - /*Strip=*/true, &StatsMap); + if (Mode == DebugifyMode::SyntheticDebugInfo) + checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify", + /*Strip=*/true, DIStatsMap); + else + checkDebugInfoMetadata( + M, M.functions(), *DebugInfoBeforePass, + "CheckModuleDebugify (original debuginfo)", + P, OrigDIVerifyBugsReportFilePath); } }); } diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index cd3b6c1a095a..023a0afd329b 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -402,7 +402,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, Optional<MDNode *> NewLoopID = makeFollowupLoopID( LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); if (NewLoopID) { - NewLoop->setLoopID(NewLoopID.getValue()); + NewLoop->setLoopID(NewLoopID.value()); // Do not setLoopAlreadyUnrolled if loop attributes have been defined // explicitly. diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index ec898c463574..82f993b4ceab 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -75,9 +75,6 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, if (isa<IndirectBrInst>(PredBB->getTerminator())) // We cannot rewrite exiting edges from an indirectbr. return false; - if (isa<CallBrInst>(PredBB->getTerminator())) - // We cannot rewrite exiting edges from a callbr. - return false; InLoopPredecessors.push_back(PredBB); } else { @@ -359,7 +356,7 @@ TransformationMode llvm::hasUnrollTransformation(const Loop *L) { Optional<int> Count = getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count"); if (Count) - return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; + return Count.value() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) return TM_ForcedByUser; @@ -380,7 +377,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) { Optional<int> Count = getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count"); if (Count) - return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; + return Count.value() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable")) return TM_ForcedByUser; @@ -1246,6 +1243,20 @@ static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) return true; } +/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi, +/// and returns true if this Phi is an induction phi in the loop. When +/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI. +static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, + InductionDescriptor &ID) { + if (!Phi) + return false; + if (!L->getLoopPreheader()) + return false; + if (Phi->getParent() != L->getHeader()) + return false; + return InductionDescriptor::isInductionPHI(Phi, L, SE, ID); +} + int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, @@ -1297,6 +1308,46 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, if (!L->contains(Inst)) continue; + // Find exit values which are induction variables in the loop, and are + // unused in the loop, with the only use being the exit block PhiNode, + // and the induction variable update binary operator. + // The exit value can be replaced with the final value when it is cheap + // to do so. + if (ReplaceExitValue == UnusedIndVarInLoop) { + InductionDescriptor ID; + PHINode *IndPhi = dyn_cast<PHINode>(Inst); + if (IndPhi) { + if (!checkIsIndPhi(IndPhi, L, SE, ID)) + continue; + // This is an induction PHI. Check that the only users are PHI + // nodes, and induction variable update binary operators. + if (llvm::any_of(Inst->users(), [&](User *U) { + if (!isa<PHINode>(U) && !isa<BinaryOperator>(U)) + return true; + BinaryOperator *B = dyn_cast<BinaryOperator>(U); + if (B && B != ID.getInductionBinOp()) + return true; + return false; + })) + continue; + } else { + // If it is not an induction phi, it must be an induction update + // binary operator with an induction phi user. + BinaryOperator *B = dyn_cast<BinaryOperator>(Inst); + if (!B) + continue; + if (llvm::any_of(Inst->users(), [&](User *U) { + PHINode *Phi = dyn_cast<PHINode>(U); + if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID)) + return true; + return false; + })) + continue; + if (B != ID.getInductionBinOp()) + continue; + } + } + // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. We prefer to start with @@ -1362,7 +1413,9 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, // Only do the rewrite when the ExitValue can be expanded cheaply. // If LoopCanBeDel is true, rewrite exit value aggressively. - if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) + if ((ReplaceExitValue == OnlyCheapRepl || + ReplaceExitValue == UnusedIndVarInLoop) && + !LoopCanBeDel && Phi.HighCost) continue; Value *ExitVal = Rewriter.expandCodeFor( diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp index 8641581c8039..9914a5ca6c5e 100644 --- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp +++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp @@ -74,6 +74,10 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op, return Builder.CreateFAdd(Loaded, Inc, "new"); case AtomicRMWInst::FSub: return Builder.CreateFSub(Loaded, Inc, "new"); + case AtomicRMWInst::FMax: + return Builder.CreateMaxNum(Loaded, Inc); + case AtomicRMWInst::FMin: + return Builder.CreateMinNum(Loaded, Inc); default: llvm_unreachable("Unknown atomic op"); } diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp index b73d68ebec7c..4414b04c7264 100644 --- a/llvm/lib/Transforms/Utils/MisExpect.cpp +++ b/llvm/lib/Transforms/Utils/MisExpect.cpp @@ -221,7 +221,7 @@ void checkBackendInstrumentation(Instruction &I, auto ExpectedWeightsOpt = extractWeights(&I, I.getContext()); if (!ExpectedWeightsOpt) return; - auto ExpectedWeights = ExpectedWeightsOpt.getValue(); + auto ExpectedWeights = ExpectedWeightsOpt.value(); verifyMisExpect(I, RealWeights, ExpectedWeights); } @@ -230,7 +230,7 @@ void checkFrontendInstrumentation(Instruction &I, auto RealWeightsOpt = extractWeights(&I, I.getContext()); if (!RealWeightsOpt) return; - auto RealWeights = RealWeightsOpt.getValue(); + auto RealWeights = RealWeightsOpt.value(); verifyMisExpect(I, RealWeights, ExpectedWeights); } diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 5120ade70e16..9e1492b97a86 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -255,7 +255,7 @@ void VFABI::setVectorVariantNames(CallInst *CI, LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n"); Optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M); assert(VI && "Cannot add an invalid VFABI name."); - assert(M->getNamedValue(VI.getValue().VectorName) && + assert(M->getNamedValue(VI.value().VectorName) && "Cannot add variant to attribute: " "vector function declaration is missing."); } @@ -275,5 +275,13 @@ void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, GV->setSection(SectionName); GV->setAlignment(Alignment); + LLVMContext &Ctx = M.getContext(); + NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects"); + Metadata *MDVals[] = {ConstantAsMetadata::get(GV), + MDString::get(Ctx, SectionName)}; + + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); + GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {})); + appendToCompilerUsed(M, GV); } diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index aff692b36288..bec1db896efb 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -488,31 +488,33 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, StoresByIndex, std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)), less_first()); + Value *ReplVal; if (I == StoresByIndex.begin()) { if (StoresByIndex.empty()) // If there are no stores, the load takes the undef value. - LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + ReplVal = UndefValue::get(LI->getType()); else // There is no store before this load, bail out (load may be affected // by the following stores - see main comment). return false; } else { - // Otherwise, there was a store before this load, the load takes its value. - // Note, if the load was marked as nonnull we don't want to lose that - // information when we erase it. So we preserve it with an assume. - Value *ReplVal = std::prev(I)->second->getOperand(0); - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) - addAssumeNonNull(AC, LI); + // Otherwise, there was a store before this load, the load takes its + // value. + ReplVal = std::prev(I)->second->getOperand(0); + } - // If the replacement value is the load, this must occur in unreachable - // code. - if (ReplVal == LI) - ReplVal = PoisonValue::get(LI->getType()); + // Note, if the load was marked as nonnull we don't want to lose that + // information when we erase it. So we preserve it with an assume. + if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && + !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) + addAssumeNonNull(AC, LI); - LI->replaceAllUsesWith(ReplVal); - } + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = PoisonValue::get(LI->getType()); + LI->replaceAllUsesWith(ReplVal); LI->eraseFromParent(); LBI.deleteValue(LI); } diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index eee91e70292e..09a83f1ea094 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -208,8 +208,6 @@ private: if (!Elt) LV.markOverdefined(); // Unknown sort of constant. - else if (isa<UndefValue>(Elt)) - ; // Undef values remain unknown. else LV.markConstant(Elt); // Constants are constant. } @@ -356,8 +354,7 @@ public: // We only track the contents of scalar globals. if (GV->getValueType()->isSingleValueType()) { ValueLatticeElement &IV = TrackedGlobals[GV]; - if (!isa<UndefValue>(GV->getInitializer())) - IV.markConstant(GV->getInitializer()); + IV.markConstant(GV->getInitializer()); } } @@ -822,9 +819,6 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) { if (Constant *OpC = getConstant(OpSt)) { // Fold the constant as we build. Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL); - if (isa<UndefValue>(C)) - return; - // Propagate constant value markConstant(&I, C); } else if (I.getDestTy()->isIntegerTy()) { auto &LV = getValueState(&I); @@ -959,19 +953,15 @@ void SCCPInstVisitor::visitUnaryOperator(Instruction &I) { if (isOverdefined(IV)) return (void)markOverdefined(&I); - if (isConstant(V0State)) { - Constant *C = ConstantExpr::get(I.getOpcode(), getConstant(V0State)); - - // op Y -> undef. - if (isa<UndefValue>(C)) - return; - return (void)markConstant(IV, &I, C); - } - - // If something is undef, wait for it to resolve. - if (!isOverdefined(V0State)) + // If something is unknown/undef, wait for it to resolve. + if (V0State.isUnknownOrUndef()) return; + if (isConstant(V0State)) + if (Constant *C = ConstantFoldUnaryOpOperand(I.getOpcode(), + getConstant(V0State), DL)) + return (void)markConstant(IV, &I, C); + markOverdefined(&I); } @@ -999,9 +989,6 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) { Value *R = simplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL)); auto *C = dyn_cast_or_null<Constant>(R); if (C) { - // X op Y -> undef. - if (isa<UndefValue>(C)) - return; // Conservatively assume that the result may be based on operands that may // be undef. Note that we use mergeInValue to combine the constant with // the existing lattice value for I, as different constants might be found @@ -1050,6 +1037,7 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) { Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State); if (C) { + // TODO: getCompare() currently has incorrect handling for unknown/undef. if (isa<UndefValue>(C)) return; ValueLatticeElement CV; @@ -1095,8 +1083,6 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end()); Constant *C = ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices); - if (isa<UndefValue>(C)) - return; markConstant(&I, C); } @@ -1174,11 +1160,8 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) { } // Transform load from a constant into a constant if possible. - if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL)) { - if (isa<UndefValue>(C)) - return; + if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL)) return (void)markConstant(IV, &I, C); - } } // Fall back to metadata. @@ -1223,12 +1206,8 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) { // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F))) { - // call -> undef. - if (isa<UndefValue>(C)) - return; + if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F))) return (void)markConstant(&CB, C); - } } // Fall back to metadata. diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 401f1ee5a55d..0c8bf3827256 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -220,7 +220,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, // Fold a binop with constant operands. if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantExpr::get(Opcode, CLHS, CRHS); + if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL)) + return Res; // Do a quick scan to see if we have this binop nearby. If so, reuse it. unsigned ScanLimit = 6; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 567b866f7777..4b5ade99767b 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -377,18 +377,12 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, /// expensive. static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI) { - assert(isSafeToSpeculativelyExecute(I) && + assert((!isa<Instruction>(I) || + isSafeToSpeculativelyExecute(cast<Instruction>(I))) && "Instruction is not safe to speculatively execute!"); return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); } -/// Check whether this is a potentially trapping constant. -static bool canTrap(const Value *V) { - if (auto *C = dyn_cast<Constant>(V)) - return C->canTrap(); - return false; -} - /// If we have a merge point of an "if condition" as accepted above, /// return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case @@ -421,9 +415,9 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *I = dyn_cast<Instruction>(V); if (!I) { - // Non-instructions all dominate instructions, but not all constantexprs - // can be executed unconditionally. - return !canTrap(V); + // Non-instructions dominate all instructions and can be executed + // unconditionally. + return true; } BasicBlock *PBB = I->getParent(); @@ -1473,10 +1467,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, while (isa<DbgInfoIntrinsic>(I2)) I2 = &*BB2_Itr++; } - // FIXME: Can we define a safety predicate for CallBr? - if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) || - (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) || - isa<CallBrInst>(I1)) + if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2)) return false; BasicBlock *BIParent = BI->getParent(); @@ -1609,11 +1600,6 @@ HoistTerminator: if (passingValueIsAlwaysUndefined(BB1V, &PN) || passingValueIsAlwaysUndefined(BB2V, &PN)) return Changed; - - if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) - return Changed; - if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) - return Changed; } } @@ -2679,9 +2665,6 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, passingValueIsAlwaysUndefined(ThenV, &PN)) return false; - if (canTrap(OrigV) || canTrap(ThenV)) - return false; - HaveRewritablePHIs = true; ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); @@ -2979,10 +2962,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { return true; } -static ConstantInt * -getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To, - SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, - ConstantInt *> &Visited) { +static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From, + BasicBlock *To) { // Don't look past the block defining the value, we might get the value from // a previous loop iteration. auto *I = dyn_cast<Instruction>(V); @@ -2996,23 +2977,7 @@ getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To, return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext()) : ConstantInt::getFalse(BI->getContext()); - // Limit the amount of blocks we inspect. - if (Visited.size() >= 8) - return nullptr; - - auto Pair = Visited.try_emplace({From, To}, nullptr); - if (!Pair.second) - return Pair.first->second; - - // Check whether the known value is the same for all predecessors. - ConstantInt *Common = nullptr; - for (BasicBlock *Pred : predecessors(From)) { - ConstantInt *C = getKnownValueOnEdge(V, Pred, From, Visited); - if (!C || (Common && Common != C)) - return nullptr; - Common = C; - } - return Visited[{From, To}] = Common; + return nullptr; } /// If we have a conditional branch on something for which we know the constant @@ -3022,7 +2987,7 @@ static Optional<bool> FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC) { - SmallMapVector<BasicBlock *, ConstantInt *, 8> KnownValues; + SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues; BasicBlock *BB = BI->getParent(); Value *Cond = BI->getCondition(); PHINode *PN = dyn_cast<PHINode>(Cond); @@ -3035,12 +3000,11 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, for (Use &U : PN->incoming_values()) if (auto *CB = dyn_cast<ConstantInt>(U)) - KnownValues.insert({PN->getIncomingBlock(U), CB}); + KnownValues[CB].insert(PN->getIncomingBlock(U)); } else { - SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, ConstantInt *> Visited; for (BasicBlock *Pred : predecessors(BB)) { - if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB, Visited)) - KnownValues.insert({Pred, CB}); + if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB)) + KnownValues[CB].insert(Pred); } } @@ -3056,29 +3020,34 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, for (const auto &Pair : KnownValues) { // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. - ConstantInt *CB = Pair.second; - BasicBlock *PredBB = Pair.first; + ConstantInt *CB = Pair.first; + ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef(); BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); if (RealDest == BB) continue; // Skip self loops. + // Skip if the predecessor's terminator is an indirect branch. - if (isa<IndirectBrInst>(PredBB->getTerminator())) + if (any_of(PredBBs, [](BasicBlock *PredBB) { + return isa<IndirectBrInst>(PredBB->getTerminator()); + })) continue; - SmallVector<DominatorTree::UpdateType, 3> Updates; + LLVM_DEBUG({ + dbgs() << "Condition " << *Cond << " in " << BB->getName() + << " has value " << *Pair.first << " in predecessors:\n"; + for (const BasicBlock *PredBB : Pair.second) + dbgs() << " " << PredBB->getName() << "\n"; + dbgs() << "Threading to destination " << RealDest->getName() << ".\n"; + }); + + // Split the predecessors we are threading into a new edge block. We'll + // clone the instructions into this block, and then redirect it to RealDest. + BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU); - // The dest block might have PHI nodes, other predecessors and other - // difficult cases. Instead of being smart about this, just insert a new - // block that jumps to the destination block, effectively splitting - // the edge we are about to create. - BasicBlock *EdgeBB = - BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge", - RealDest->getParent(), RealDest); - BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB); - if (DTU) - Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest}); - CritEdgeBranch->setDebugLoc(BI->getDebugLoc()); + // TODO: These just exist to reduce test diff, we can drop them if we like. + EdgeBB->setName(RealDest->getName() + ".critedge"); + EdgeBB->moveBefore(RealDest); // Update PHI nodes. AddPredecessorToBlock(RealDest, EdgeBB, BB); @@ -3086,12 +3055,12 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, // BB may have instructions that are being threaded over. Clone these // instructions into EdgeBB. We know that there will be no uses of the // cloned instructions outside of EdgeBB. - BasicBlock::iterator InsertPt = EdgeBB->begin(); + BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt(); DenseMap<Value *, Value *> TranslateMap; // Track translated values. - TranslateMap[Cond] = Pair.second; + TranslateMap[Cond] = CB; for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (PHINode *PN = dyn_cast<PHINode>(BBI)) { - TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); + TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB); continue; } // Clone the instruction. @@ -3129,19 +3098,15 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, } } - // Loop over all of the edges from PredBB to BB, changing them to branch - // to EdgeBB instead. - Instruction *PredBBTI = PredBB->getTerminator(); - for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) - if (PredBBTI->getSuccessor(i) == BB) { - BB->removePredecessor(PredBB); - PredBBTI->setSuccessor(i, EdgeBB); - } + BB->removePredecessor(EdgeBB); + BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator()); + EdgeBI->setSuccessor(0, RealDest); + EdgeBI->setDebugLoc(BI->getDebugLoc()); if (DTU) { - Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB}); - Updates.push_back({DominatorTree::Delete, PredBB, BB}); - + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Delete, EdgeBB, BB}); + Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest}); DTU->applyUpdates(Updates); } @@ -3599,13 +3564,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, Cond->getParent() != BB || !Cond->hasOneUse()) return false; - // Cond is known to be a compare or binary operator. Check to make sure that - // neither operand is a potentially-trapping constant expression. - if (canTrap(Cond->getOperand(0))) - return false; - if (canTrap(Cond->getOperand(1))) - return false; - // Finally, don't infinitely unroll conditional loops. if (is_contained(successors(BB), BB)) return false; @@ -4113,9 +4071,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (tryWidenCondBranchToCondBranch(PBI, BI, DTU)) return true; - if (canTrap(BI->getCondition())) - return false; - // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. @@ -4157,10 +4112,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. - // Also do not perform this transformation if any phi node in the common - // destination block can trap when reached by BB or PBB (PR17073). In that - // case, it would be unsafe to hoist the operation into a select instruction. - BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1); unsigned NumPhis = 0; @@ -4168,16 +4119,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, ++II, ++NumPhis) { if (NumPhis > 2) // Disable this xform. return false; - - PHINode *PN = cast<PHINode>(II); - Value *BIV = PN->getIncomingValueForBlock(BB); - if (canTrap(BIV)) - return false; - - unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); - Value *PBIV = PN->getIncomingValue(PBBIdx); - if (canTrap(PBIV)) - return false; } // Finally, if everything is ok, fold the branches to logical ops. @@ -6174,6 +6115,23 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, return isSwitchDense(SI->getNumCases(), TableSize); } +static bool ShouldUseSwitchConditionAsTableIndex( + ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, + bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes, + const DataLayout &DL, const TargetTransformInfo &TTI) { + if (MinCaseVal.isNullValue()) + return true; + if (MinCaseVal.isNegative() || + MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() || + !HasDefaultResults) + return false; + return all_of(ResultTypes, [&](const auto &KV) { + return SwitchLookupTable::WouldFitInRegister( + DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, + KV.second /* ResultType */); + }); +} + /// Try to reuse the switch table index compare. Following pattern: /// \code /// if (idx < tablesize) @@ -6329,9 +6287,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, } uint64_t NumResults = ResultLists[PHIs[0]].size(); - APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); - uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - bool TableHasHoles = (NumResults < TableSize); // If the table has holes, we need a constant result for the default case // or a bitmask that fits in a register. @@ -6340,6 +6295,22 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResultsList, DL, TTI); + for (const auto &I : DefaultResultsList) { + PHINode *PHI = I.first; + Constant *Result = I.second; + DefaultResults[PHI] = Result; + } + + bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex( + *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI); + uint64_t TableSize; + if (UseSwitchConditionAsTableIndex) + TableSize = MaxCaseVal->getLimitedValue() + 1; + else + TableSize = + (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1; + + bool TableHasHoles = (NumResults < TableSize); bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { // As an extra penalty for the validity test we require more cases. @@ -6349,12 +6320,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, return false; } - for (const auto &I : DefaultResultsList) { - PHINode *PHI = I.first; - Constant *Result = I.second; - DefaultResults[PHI] = Result; - } - if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) return false; @@ -6368,11 +6333,15 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Compute the table index value. Builder.SetInsertPoint(SI); Value *TableIndex; - if (MinCaseVal->isNullValue()) + ConstantInt *TableIndexOffset; + if (UseSwitchConditionAsTableIndex) { + TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0); TableIndex = SI->getCondition(); - else - TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, - "switch.tableidx"); + } else { + TableIndexOffset = MinCaseVal; + TableIndex = + Builder.CreateSub(SI->getCondition(), TableIndexOffset, "switch.tableidx"); + } // Compute the maximum table size representable by the integer type we are // switching upon. @@ -6424,7 +6393,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Build bitmask; fill in a 1 bit for every case. const ResultListTy &ResultList = ResultLists[PHIs[0]]; for (size_t I = 0, E = ResultList.size(); I != E; ++I) { - uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue()) + uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue()) .getLimitedValue(); MaskInt |= One << Idx; } @@ -6463,8 +6432,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If using a bitmask, use any value to fill the lookup table holes. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; StringRef FuncName = Fn->getName(); - SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL, - FuncName); + SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV, + DL, FuncName); Value *Result = Table.BuildLookup(TableIndex, Builder); diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index dbef1ff2e739..af15e0c31b75 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -79,21 +79,23 @@ namespace { bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); bool replaceIVUserWithLoopInvariant(Instruction *UseInst); + bool replaceFloatIVWithIntegerIV(Instruction *UseInst); bool eliminateOverflowIntrinsic(WithOverflowInst *WO); bool eliminateSaturatingIntrinsic(SaturatingInst *SI); bool eliminateTrunc(TruncInst *TI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); - bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand); - void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); - void simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand, + bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand); + void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand); + void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand, bool IsSigned); void replaceRemWithNumerator(BinaryOperator *Rem); void replaceRemWithNumeratorOrZero(BinaryOperator *Rem); void replaceSRemWithURem(BinaryOperator *Rem); bool eliminateSDiv(BinaryOperator *SDiv); - bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); - bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand); + bool strengthenOverflowingOperation(BinaryOperator *OBO, + Instruction *IVOperand); + bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand); }; } @@ -192,7 +194,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) } bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, - Value *IVOperand) { + Instruction *IVOperand) { unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); if (IVOperand != ICmp->getOperand(0)) { @@ -261,7 +263,8 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, /// SimplifyIVUsers helper for eliminating useless /// comparisons against an induction variable. -void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { +void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, + Instruction *IVOperand) { unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); ICmpInst::Predicate OriginalPred = Pred; @@ -372,7 +375,8 @@ void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) { /// SimplifyIVUsers helper for eliminating useless remainder operations /// operating on an induction variable or replacing srem by urem. -void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand, +void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, + Instruction *IVOperand, bool IsSigned) { auto *NValue = Rem->getOperand(0); auto *DValue = Rem->getOperand(1); @@ -673,6 +677,35 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { return true; } +/// Eliminate redundant type cast between integer and float. +bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { + if (UseInst->getOpcode() != CastInst::SIToFP) + return false; + + Value *IVOperand = UseInst->getOperand(0); + // Get the symbolic expression for this instruction. + ConstantRange IVRange = SE->getSignedRange(SE->getSCEV(IVOperand)); + unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); + if (IVRange.getActiveBits() <= DestNumSigBits) { + for (User *U : UseInst->users()) { + // Match for fptosi of sitofp and with same type. + auto *CI = dyn_cast<FPToSIInst>(U); + if (!CI || IVOperand->getType() != CI->getType()) + continue; + + CI->replaceAllUsesWith(IVOperand); + DeadInsts.push_back(CI); + LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI + << " with: " << *IVOperand << '\n'); + + ++NumFoldedUser; + Changed = true; + } + } + + return Changed; +} + /// Eliminate any operation that SCEV can prove is an identity function. bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand) { @@ -718,18 +751,16 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, /// Annotate BO with nsw / nuw if it provably does not signed-overflow / /// unsigned-overflow. Returns true if anything changed, false otherwise. bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, - Value *IVOperand) { - SCEV::NoWrapFlags Flags; - bool Deduced; - std::tie(Flags, Deduced) = SE->getStrengthenedNoWrapFlagsFromBinOp( + Instruction *IVOperand) { + auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp( cast<OverflowingBinaryOperator>(BO)); - if (!Deduced) - return Deduced; + if (!Flags) + return false; - BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNUW) == + BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW); - BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNSW) == + BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW); // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap @@ -737,14 +768,14 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, // forgetValue() here to make sure those flags also propagate to any other // SCEV expressions based on the addrec. However, this can have pathological // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384. - return Deduced; + return true; } /// Annotate the Shr in (X << IVOperand) >> C as exact using the /// information from the IV's range. Returns true if anything changed, false /// otherwise. bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, - Value *IVOperand) { + Instruction *IVOperand) { using namespace llvm::PatternMatch; if (BO->getOpcode() == Instruction::Shl) { @@ -896,6 +927,13 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { } } + // Try to use integer induction for FPToSI of float induction directly. + if (replaceFloatIVWithIntegerIV(UseInst)) { + // Re-queue the potentially new direct uses of IVOperand. + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); + continue; + } + CastInst *Cast = dyn_cast<CastInst>(UseInst); if (V && Cast) { V->visitCast(Cast); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index f4306bb43dfd..b359717424a6 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -75,7 +75,8 @@ static bool callHasFP128Argument(const CallInst *CI) { }); } -static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) { +static Value *convertStrToNumber(CallInst *CI, StringRef &Str, Value *EndPtr, + int64_t Base, IRBuilderBase &B) { if (Base < 2 || Base > 36) // handle special zero base if (Base != 0) @@ -97,6 +98,15 @@ static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) { if (!isIntN(CI->getType()->getPrimitiveSizeInBits(), Result)) return nullptr; + if (EndPtr) { + // Store the pointer to the end. + uint64_t ILen = End - nptr.c_str(); + Value *Off = B.getInt64(ILen); + Value *StrBeg = CI->getArgOperand(0); + Value *StrEnd = B.CreateInBoundsGEP(B.getInt8Ty(), StrBeg, Off, "endptr"); + B.CreateStore(StrEnd, EndPtr); + } + return ConstantInt::get(CI->getType(), Result); } @@ -295,31 +305,69 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) { return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B)); } +// Helper to transform memchr(S, C, N) == S to N && *S == C and, when +// NBytes is null, strchr(S, C) to *S == C. A precondition of the function +// is that either S is dereferenceable or the value of N is nonzero. +static Value* memChrToCharCompare(CallInst *CI, Value *NBytes, + IRBuilderBase &B, const DataLayout &DL) +{ + Value *Src = CI->getArgOperand(0); + Value *CharVal = CI->getArgOperand(1); + + // Fold memchr(A, C, N) == A to N && *A == C. + Type *CharTy = B.getInt8Ty(); + Value *Char0 = B.CreateLoad(CharTy, Src); + CharVal = B.CreateTrunc(CharVal, CharTy); + Value *Cmp = B.CreateICmpEQ(Char0, CharVal, "char0cmp"); + + if (NBytes) { + Value *Zero = ConstantInt::get(NBytes->getType(), 0); + Value *And = B.CreateICmpNE(NBytes, Zero); + Cmp = B.CreateLogicalAnd(And, Cmp); + } + + Value *NullPtr = Constant::getNullValue(CI->getType()); + return B.CreateSelect(Cmp, Src, NullPtr); +} + Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); Value *SrcStr = CI->getArgOperand(0); + Value *CharVal = CI->getArgOperand(1); annotateNonNullNoUndefBasedOnAccess(CI, 0); + if (isOnlyUsedInEqualityComparison(CI, SrcStr)) + return memChrToCharCompare(CI, nullptr, B, DL); + // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. - ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal); if (!CharC) { uint64_t Len = GetStringLength(SrcStr); if (Len) annotateDereferenceableBytes(CI, 0, Len); else return nullptr; + + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; return copyFlags( *CI, - emitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + emitMemChr(SrcStr, CharVal, // include nul. ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), B, DL, TLI)); } + if (CharC->isZero()) { + Value *NullPtr = Constant::getNullValue(CI->getType()); + if (isOnlyUsedInEqualityComparison(CI, NullPtr)) + // Pre-empt the transformation to strlen below and fold + // strchr(A, '\0') == null to false. + return B.CreateIntToPtr(B.getTrue(), CI->getType()); + } + // Otherwise, the character is a constant, see if the first argument is // a string literal. If so, we can constant fold. StringRef Str; @@ -1008,8 +1056,12 @@ Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) { Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { Value *SrcStr = CI->getArgOperand(0); Value *Size = CI->getArgOperand(2); - if (isKnownNonZero(Size, DL)) + + if (isKnownNonZero(Size, DL)) { annotateNonNullNoUndefBasedOnAccess(CI, 0); + if (isOnlyUsedInEqualityComparison(CI, SrcStr)) + return memChrToCharCompare(CI, Size, B, DL); + } Value *CharVal = CI->getArgOperand(1); ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal); @@ -1099,9 +1151,16 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2"); } - if (!LenC) + if (!LenC) { + if (isOnlyUsedInEqualityComparison(CI, SrcStr)) + // S is dereferenceable so it's safe to load from it and fold + // memchr(S, C, N) == S to N && *S == C for any C and N. + // TODO: This is safe even even for nonconstant S. + return memChrToCharCompare(CI, Size, B, DL); + // From now on we need a constant length and constant array. return nullptr; + } // If the char is variable but the input str and length are not we can turn // this memchr call into a simple bit field test. Of course this only works @@ -1589,31 +1648,6 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func, return nullptr; } -static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilderBase &B) { - // Multiplications calculated using Addition Chains. - // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html - - assert(Exp != 0 && "Incorrect exponent 0 not handled"); - - if (InnerChain[Exp]) - return InnerChain[Exp]; - - static const unsigned AddChain[33][2] = { - {0, 0}, // Unused. - {0, 0}, // Unused (base case = pow1). - {1, 1}, // Unused (pre-computed). - {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4}, - {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7}, - {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10}, - {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13}, - {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16}, - }; - - InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B), - getPow(InnerChain, AddChain[Exp][1], B)); - return InnerChain[Exp]; -} - // Return a properly extended integer (DstWidth bits wide) if the operation is // an itofp. static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) { @@ -1914,70 +1948,52 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; - // pow(x, n) -> x * x * x * ... + // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction const APFloat *ExpoF; - if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && - !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { - // We limit to a max of 7 multiplications, thus the maximum exponent is 32. - // If the exponent is an integer+0.5 we generate a call to sqrt and an - // additional fmul. - // TODO: This whole transformation should be backend specific (e.g. some - // backends might prefer libcalls or the limit for the exponent might - // be different) and it should also consider optimizing for size. - APFloat LimF(ExpoF->getSemantics(), 33), - ExpoA(abs(*ExpoF)); - if (ExpoA < LimF) { - // This transformation applies to integer or integer+0.5 exponents only. - // For integer+0.5, we create a sqrt(Base) call. - Value *Sqrt = nullptr; - if (!ExpoA.isInteger()) { - APFloat Expo2 = ExpoA; - // To check if ExpoA is an integer + 0.5, we add it to itself. If there - // is no floating point exception and the result is an integer, then - // ExpoA == integer + 0.5 - if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK) - return nullptr; - - if (!Expo2.isInteger()) - return nullptr; - - Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), M, B, TLI); - if (!Sqrt) - return nullptr; - } - - // We will memoize intermediate products of the Addition Chain. - Value *InnerChain[33] = {nullptr}; - InnerChain[1] = Base; - InnerChain[2] = B.CreateFMul(Base, Base, "square"); - - // We cannot readily convert a non-double type (like float) to a double. - // So we first convert it to something which could be converted to double. - ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); - Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); + if (match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) && + !ExpoF->isExactlyValue(-0.5)) { + APFloat ExpoA(abs(*ExpoF)); + APFloat ExpoI(*ExpoF); + Value *Sqrt = nullptr; + if (AllowApprox && !ExpoA.isInteger()) { + APFloat Expo2 = ExpoA; + // To check if ExpoA is an integer + 0.5, we add it to itself. If there + // is no floating point exception and the result is an integer, then + // ExpoA == integer + 0.5 + if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK) + return nullptr; - // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x). - if (Sqrt) - FMul = B.CreateFMul(FMul, Sqrt); + if (!Expo2.isInteger()) + return nullptr; - // If the exponent is negative, then get the reciprocal. - if (ExpoF->isNegative()) - FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); + if (ExpoI.roundToIntegral(APFloat::rmTowardNegative) != + APFloat::opInexact) + return nullptr; + if (!ExpoI.isInteger()) + return nullptr; + ExpoF = &ExpoI; - return FMul; + Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); + if (!Sqrt) + return nullptr; } + // pow(x, n) -> powi(x, n) if n is a constant signed integer value APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false); - // powf(x, n) -> powi(x, n) if n is a constant signed integer value if (ExpoF->isInteger() && ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == APFloat::opOK) { - return copyFlags( + Value *PowI = copyFlags( *Pow, createPowWithIntegerExponent( Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), M, B)); + + if (PowI && Sqrt) + return B.CreateFMul(PowI, Sqrt); + + return PowI; } } @@ -2517,7 +2533,7 @@ Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) { if (!getConstantStringInfo(CI->getArgOperand(0), Str)) return nullptr; - return convertStrToNumber(CI, Str, 10); + return convertStrToNumber(CI, Str, nullptr, 10, B); } Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilderBase &B) { @@ -2525,11 +2541,14 @@ Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilderBase &B) { if (!getConstantStringInfo(CI->getArgOperand(0), Str)) return nullptr; - if (!isa<ConstantPointerNull>(CI->getArgOperand(1))) + Value *EndPtr = CI->getArgOperand(1); + if (isa<ConstantPointerNull>(EndPtr)) + EndPtr = nullptr; + else if (!isKnownNonZero(EndPtr, DL)) return nullptr; if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) { - return convertStrToNumber(CI, Str, CInt->getSExtValue()); + return convertStrToNumber(CI, Str, EndPtr, CInt->getSExtValue(), B); } return nullptr; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 6242d9a93fc1..183ba86abcb4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -386,20 +386,6 @@ static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { return true; } -/// Check whether it is safe to if-convert this phi node. -/// -/// Phi nodes with constant expressions that can trap are not safe to if -/// convert. -static bool canIfConvertPHINodes(BasicBlock *BB) { - for (PHINode &Phi : BB->phis()) { - for (Value *V : Phi.incoming_values()) - if (auto *C = dyn_cast<Constant>(V)) - if (C->canTrap()) - return false; - } - return true; -} - static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { if (Ty->isPointerTy()) return DL.getIntPtrType(Ty); @@ -993,7 +979,6 @@ bool LoopVectorizationLegality::canVectorizeMemory() { } } - Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); PSE.addPredicate(LAI->getPSE().getPredicate()); return true; } @@ -1098,13 +1083,6 @@ bool LoopVectorizationLegality::blockCanBePredicated( SmallPtrSetImpl<const Instruction *> &MaskedOp, SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const { for (Instruction &I : *BB) { - // Check that we don't have a constant expression that can trap as operand. - for (Value *Operand : I.operands()) { - if (auto *C = dyn_cast<Constant>(Operand)) - if (C->canTrap()) - return false; - } - // We can predicate blocks with calls to assume, as long as we drop them in // case we flatten the CFG via predication. if (match(&I, m_Intrinsic<Intrinsic::assume>())) { @@ -1190,7 +1168,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { } // Collect the blocks that need predication. - BasicBlock *Header = TheLoop->getHeader(); for (BasicBlock *BB : TheLoop->blocks()) { // We don't support switch statements inside loops. if (!isa<BranchInst>(BB->getTerminator())) { @@ -1212,13 +1189,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { BB->getTerminator()); return false; } - } else if (BB != Header && !canIfConvertPHINodes(BB)) { - reportVectorizationFailure( - "Control flow cannot be substituted for a select", - "control flow cannot be substituted for a select", - "NoCFGForSelect", ORE, TheLoop, - BB->getTerminator()); - return false; } } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 0cb2032fa45a..2e9a9fe0640e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -33,7 +33,6 @@ class LoopInfo; class LoopVectorizationLegality; class LoopVectorizationCostModel; class PredicatedScalarEvolution; -class LoopVectorizationRequirements; class LoopVectorizeHints; class OptimizationRemarkEmitter; class TargetTransformInfo; @@ -46,8 +45,9 @@ class VPBuilder { VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator(); VPInstruction *createInstruction(unsigned Opcode, - ArrayRef<VPValue *> Operands, DebugLoc DL) { - VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL); + ArrayRef<VPValue *> Operands, DebugLoc DL, + const Twine &Name = "") { + VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL, Name); if (BB) BB->insert(Instr, InsertPt); return Instr; @@ -55,8 +55,8 @@ class VPBuilder { VPInstruction *createInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, - DebugLoc DL) { - return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL); + DebugLoc DL, const Twine &Name = "") { + return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name); } public: @@ -124,34 +124,37 @@ public: /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as /// its underlying Instruction. VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, - Instruction *Inst = nullptr) { + Instruction *Inst = nullptr, const Twine &Name = "") { DebugLoc DL; if (Inst) DL = Inst->getDebugLoc(); - VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL); + VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name); NewVPInst->setUnderlyingValue(Inst); return NewVPInst; } VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, - DebugLoc DL) { - return createInstruction(Opcode, Operands, DL); + DebugLoc DL, const Twine &Name = "") { + return createInstruction(Opcode, Operands, DL, Name); } - VPValue *createNot(VPValue *Operand, DebugLoc DL) { - return createInstruction(VPInstruction::Not, {Operand}, DL); + VPValue *createNot(VPValue *Operand, DebugLoc DL, const Twine &Name = "") { + return createInstruction(VPInstruction::Not, {Operand}, DL, Name); } - VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL) { - return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL); + VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL, + const Twine &Name = "") { + return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name); } - VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL) { - return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL); + VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL, + const Twine &Name = "") { + return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name); } VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, - DebugLoc DL) { - return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL); + DebugLoc DL, const Twine &Name = "") { + return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL, + Name); } //===--------------------------------------------------------------------===// @@ -191,6 +194,10 @@ struct VectorizationFactor { /// Cost of the scalar loop. InstructionCost ScalarCost; + /// The minimum trip count required to make vectorization profitable, e.g. due + /// to runtime checks. + ElementCount MinProfitableTripCount; + VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost) : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {} @@ -268,8 +275,6 @@ class LoopVectorizationPlanner { const LoopVectorizeHints &Hints; - LoopVectorizationRequirements &Requirements; - OptimizationRemarkEmitter *ORE; SmallVector<VPlanPtr, 4> VPlans; @@ -285,10 +290,9 @@ public: InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, - LoopVectorizationRequirements &Requirements, OptimizationRemarkEmitter *ORE) : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI), - PSE(PSE), Hints(Hints), Requirements(Requirements), ORE(ORE) {} + PSE(PSE), Hints(Hints), ORE(ORE) {} /// Plan how to best vectorize, return the best VF and its cost, or None if /// vectorization and interleaving should be avoided up front. @@ -332,11 +336,6 @@ public: bool requiresTooManyRuntimeChecks() const; protected: - /// Collect the instructions from the original loop that would be trivially - /// dead in the vectorized loop if generated. - void collectTriviallyDeadInstructions( - SmallPtrSetImpl<Instruction *> &DeadInstructions); - /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b637b2d5ddae..0777a1385916 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -196,10 +196,9 @@ static cl::opt<unsigned> TinyTripCountVectorThreshold( "value are vectorized only if no scalar iteration overheads " "are incurred.")); -static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold( - "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, - cl::desc("The maximum allowed number of runtime memory checks with a " - "vectorize(enable) pragma.")); +static cl::opt<unsigned> VectorizeMemoryCheckThreshold( + "vectorize-memory-check-threshold", cl::init(128), cl::Hidden, + cl::desc("The maximum allowed number of runtime memory checks")); // Option prefer-predicate-over-epilogue indicates that an epilogue is undesired, // that predication is preferred, and this lists all options. I.e., the @@ -442,6 +441,7 @@ public: const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, ElementCount VecWidth, + ElementCount MinProfitableTripCount, unsigned UnrollFactor, LoopVectorizationLegality *LVL, LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks) @@ -453,6 +453,11 @@ public: // of the original loop header may change as the transformation happens. OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize( OrigLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass); + + if (MinProfitableTripCount.isZero()) + this->MinProfitableTripCount = VecWidth; + else + this->MinProfitableTripCount = MinProfitableTripCount; } virtual ~InnerLoopVectorizer() = default; @@ -656,6 +661,8 @@ protected: /// vector elements. ElementCount VF; + ElementCount MinProfitableTripCount; + /// The vectorization unroll factor to use. Each scalar is vectorized to this /// many different vector instructions. unsigned UF; @@ -735,6 +742,7 @@ public: LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, GeneratedRTChecks &Check) : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, + ElementCount::getFixed(1), ElementCount::getFixed(1), UnrollFactor, LVL, CM, BFI, PSI, Check) {} @@ -783,8 +791,8 @@ public: BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, GeneratedRTChecks &Checks) : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, - EPI.MainLoopVF, EPI.MainLoopUF, LVL, CM, BFI, PSI, - Checks), + EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL, + CM, BFI, PSI, Checks), EPI(EPI) {} // Override this function to handle the more complex control flow around the @@ -1018,7 +1026,8 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes( if (isa<VPWidenMemoryInstructionRecipe>(CurRec) || isa<VPInterleaveRecipe>(CurRec) || isa<VPScalarIVStepsRecipe>(CurRec) || - isa<VPCanonicalIVPHIRecipe>(CurRec)) + isa<VPCanonicalIVPHIRecipe>(CurRec) || + isa<VPActiveLaneMaskPHIRecipe>(CurRec)) continue; // This recipe contributes to the address computation of a widen @@ -1503,6 +1512,13 @@ public: /// Returns true if all loop blocks should be masked to fold tail loop. bool foldTailByMasking() const { return FoldTailByMasking; } + /// Returns true if were tail-folding and want to use the active lane mask + /// for vector loop control flow. + bool useActiveLaneMaskForControlFlow() const { + return FoldTailByMasking && + TTI.emitGetActiveLaneMask() == PredicationStyle::DataAndControlFlow; + } + /// Returns true if the instructions in this block requires predication /// for any reason, e.g. because tail folding now requires a predicate /// or because the block in the original loop was predicated. @@ -1551,14 +1567,14 @@ public: Scalars.clear(); } -private: - unsigned NumPredStores = 0; - /// Convenience function that returns the value of vscale_range iff /// vscale_range.min == vscale_range.max or otherwise returns the value /// returned by the corresponding TLI method. Optional<unsigned> getVScaleForTuning() const; +private: + unsigned NumPredStores = 0; + /// \return An upper bound for the vectorization factors for both /// fixed and scalable vectorization, where the minimum-known number of /// elements is a power-of-2 larger than zero. If scalable vectorization is @@ -1661,7 +1677,8 @@ private: /// A set containing all BasicBlocks that are known to present after /// vectorization as a predicated block. - SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization; + DenseMap<ElementCount, SmallPtrSet<BasicBlock *, 4>> + PredicatedBBsAfterVectorization; /// Records whether it is allowed to have the original scalar loop execute at /// least once. This may be needed as a fallback loop in case runtime @@ -1849,14 +1866,17 @@ class GeneratedRTChecks { DominatorTree *DT; LoopInfo *LI; + TargetTransformInfo *TTI; SCEVExpander SCEVExp; SCEVExpander MemCheckExp; + bool CostTooHigh = false; + public: GeneratedRTChecks(ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI, - const DataLayout &DL) - : DT(DT), LI(LI), SCEVExp(SE, DL, "scev.check"), + TargetTransformInfo *TTI, const DataLayout &DL) + : DT(DT), LI(LI), TTI(TTI), SCEVExp(SE, DL, "scev.check"), MemCheckExp(SE, DL, "scev.check") {} /// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can @@ -1867,6 +1887,15 @@ public: void Create(Loop *L, const LoopAccessInfo &LAI, const SCEVPredicate &UnionPred, ElementCount VF, unsigned IC) { + // Hard cutoff to limit compile-time increase in case a very large number of + // runtime checks needs to be generated. + // TODO: Skip cutoff if the loop is guaranteed to execute, e.g. due to + // profile info. + CostTooHigh = + LAI.getNumRuntimePointerChecks() > VectorizeMemoryCheckThreshold; + if (CostTooHigh) + return; + BasicBlock *LoopHeader = L->getHeader(); BasicBlock *Preheader = L->getLoopPreheader(); @@ -1938,6 +1967,44 @@ public: } } + InstructionCost getCost() { + if (SCEVCheckBlock || MemCheckBlock) + LLVM_DEBUG(dbgs() << "Calculating cost of runtime checks:\n"); + + if (CostTooHigh) { + InstructionCost Cost; + Cost.setInvalid(); + LLVM_DEBUG(dbgs() << " number of checks exceeded threshold\n"); + return Cost; + } + + InstructionCost RTCheckCost = 0; + if (SCEVCheckBlock) + for (Instruction &I : *SCEVCheckBlock) { + if (SCEVCheckBlock->getTerminator() == &I) + continue; + InstructionCost C = + TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput); + LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); + RTCheckCost += C; + } + if (MemCheckBlock) + for (Instruction &I : *MemCheckBlock) { + if (MemCheckBlock->getTerminator() == &I) + continue; + InstructionCost C = + TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput); + LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); + RTCheckCost += C; + } + + if (SCEVCheckBlock || MemCheckBlock) + LLVM_DEBUG(dbgs() << "Total cost of runtime checks: " << RTCheckCost + << "\n"); + + return RTCheckCost; + } + /// Remove the created SCEV & memory runtime check blocks & instructions, if /// unused. ~GeneratedRTChecks() { @@ -2880,9 +2947,16 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { // If tail is to be folded, vector loop takes care of all iterations. Type *CountTy = Count->getType(); Value *CheckMinIters = Builder.getFalse(); - Value *Step = createStepForVF(Builder, CountTy, VF, UF); + auto CreateStep = [&]() { + // Create step with max(MinProTripCount, UF * VF). + if (UF * VF.getKnownMinValue() < MinProfitableTripCount.getKnownMinValue()) + return createStepForVF(Builder, CountTy, MinProfitableTripCount, 1); + return createStepForVF(Builder, CountTy, VF, UF); + }; + if (!Cost->foldTailByMasking()) - CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check"); + CheckMinIters = + Builder.CreateICmp(P, Count, CreateStep(), "min.iters.check"); else if (VF.isScalable()) { // vscale is not necessarily a power-of-2, which means we cannot guarantee // an overflow to zero when updating induction variables and so an @@ -2894,8 +2968,9 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count); // Don't execute the vector loop if (UMax - n) < (VF * UF). - CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step); + CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, CreateStep()); } + // Create new preheader for vector loop. LoopVectorPreHeader = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(), DT, LI, nullptr, @@ -2920,7 +2995,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { } BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { - BasicBlock *const SCEVCheckBlock = RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader, LoopExitBlock); if (!SCEVCheckBlock) @@ -4792,7 +4866,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) { MaxVScale = TheFunction->getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax(); MaxScalableVF = ElementCount::getScalable( - MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0); + MaxVScale ? (MaxSafeElements / MaxVScale.value()) : 0); if (!MaxScalableVF) reportVectorizationInfo( "Max legal vector width too small, scalable vectorization " @@ -5187,9 +5261,9 @@ bool LoopVectorizationCostModel::isMoreProfitable( unsigned EstimatedWidthB = B.Width.getKnownMinValue(); if (Optional<unsigned> VScale = getVScaleForTuning()) { if (A.Width.isScalable()) - EstimatedWidthA *= VScale.getValue(); + EstimatedWidthA *= VScale.value(); if (B.Width.isScalable()) - EstimatedWidthB *= VScale.getValue(); + EstimatedWidthB *= VScale.value(); } // Assume vscale may be larger than 1 (or the value being tuned for), @@ -5872,10 +5946,11 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) { LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); - auto GetRegUsage = [&TTI = TTI](Type *Ty, ElementCount VF) -> unsigned { + const auto &TTICapture = TTI; + auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned { if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty)) return 0; - return TTI.getRegUsageForType(VectorType::get(Ty, VF)); + return TTICapture.getRegUsageForType(VectorType::get(Ty, VF)); }; for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) { @@ -6014,6 +6089,8 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) { // map will indicate that we've analyzed it already. ScalarCostsTy &ScalarCostsVF = InstsToScalarize[VF]; + PredicatedBBsAfterVectorization[VF].clear(); + // Find all the instructions that are scalar with predication in the loop and // determine if it would be better to not if-convert the blocks they are in. // If so, we also record the instructions to scalarize. @@ -6031,7 +6108,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) { computePredInstDiscount(&I, ScalarCosts, VF) >= 0) ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end()); // Remember that BB will remain after vectorization. - PredicatedBBsAfterVectorization.insert(BB); + PredicatedBBsAfterVectorization[VF].insert(BB); } } } @@ -6896,8 +6973,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, bool ScalarPredicatedBB = false; BranchInst *BI = cast<BranchInst>(I); if (VF.isVector() && BI->isConditional() && - (PredicatedBBsAfterVectorization.count(BI->getSuccessor(0)) || - PredicatedBBsAfterVectorization.count(BI->getSuccessor(1)))) + (PredicatedBBsAfterVectorization[VF].count(BI->getSuccessor(0)) || + PredicatedBBsAfterVectorization[VF].count(BI->getSuccessor(1)))) ScalarPredicatedBB = true; if (ScalarPredicatedBB) { @@ -7363,14 +7440,6 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { return VectorizationFactor::Disabled(); } -bool LoopVectorizationPlanner::requiresTooManyRuntimeChecks() const { - unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks(); - return (NumRuntimePointerChecks > - VectorizerParams::RuntimeMemoryCheckThreshold && - !Hints.allowReordering()) || - NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; -} - Optional<VectorizationFactor> LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { assert(OrigLoop->isInnermost() && "Inner loop expected."); @@ -7439,7 +7508,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { return VectorizationFactor::Disabled(); // Select the optimal vectorization factor. - return CM.selectVectorizationFactor(VFCandidates); + VectorizationFactor VF = CM.selectVectorizationFactor(VFCandidates); + assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero."); + return VF; } VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const { @@ -7554,7 +7625,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, BestVPlan.getVectorLoopRegion()->getEntryBasicBlock(); Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]); if (VectorizedLoopID) - L->setLoopID(VectorizedLoopID.getValue()); + L->setLoopID(VectorizedLoopID.value()); else { // Keep all loop hints from the original loop on the vector loop (we'll // replace the vectorizer-specific hints below). @@ -7585,51 +7656,6 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) { } #endif -void LoopVectorizationPlanner::collectTriviallyDeadInstructions( - SmallPtrSetImpl<Instruction *> &DeadInstructions) { - - // We create new control-flow for the vectorized loop, so the original exit - // conditions will be dead after vectorization if it's only used by the - // terminator - SmallVector<BasicBlock*> ExitingBlocks; - OrigLoop->getExitingBlocks(ExitingBlocks); - for (auto *BB : ExitingBlocks) { - auto *Cmp = dyn_cast<Instruction>(BB->getTerminator()->getOperand(0)); - if (!Cmp || !Cmp->hasOneUse()) - continue; - - // TODO: we should introduce a getUniqueExitingBlocks on Loop - if (!DeadInstructions.insert(Cmp).second) - continue; - - // The operands of the icmp is often a dead trunc, used by IndUpdate. - // TODO: can recurse through operands in general - for (Value *Op : Cmp->operands()) { - if (isa<TruncInst>(Op) && Op->hasOneUse()) - DeadInstructions.insert(cast<Instruction>(Op)); - } - } - - // We create new "steps" for induction variable updates to which the original - // induction variables map. An original update instruction will be dead if - // all its users except the induction variable are dead. - auto *Latch = OrigLoop->getLoopLatch(); - for (auto &Induction : Legal->getInductionVars()) { - PHINode *Ind = Induction.first; - auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch)); - - // If the tail is to be folded by masking, the primary induction variable, - // if exists, isn't dead: it will be used for masking. Don't kill it. - if (CM.foldTailByMasking() && IndUpdate == Legal->getPrimaryInduction()) - continue; - - if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool { - return U == Ind || DeadInstructions.count(cast<Instruction>(U)); - })) - DeadInstructions.insert(IndUpdate); - } -} - Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; } //===--------------------------------------------------------------------===// @@ -8001,11 +8027,19 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { if (!CM.blockNeedsPredicationForAnyReason(BB)) return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one. + assert(CM.foldTailByMasking() && "must fold the tail"); + + // If we're using the active lane mask for control flow, then we get the + // mask from the active lane mask PHI that is cached in the VPlan. + PredicationStyle EmitGetActiveLaneMask = CM.TTI.emitGetActiveLaneMask(); + if (EmitGetActiveLaneMask == PredicationStyle::DataAndControlFlow) + return BlockMaskCache[BB] = Plan->getActiveLaneMaskPhi(); + // Introduce the early-exit compare IV <= BTC to form header block mask. // This is used instead of IV < TC because TC may wrap, unlike BTC. Start by // constructing the desired canonical IV in the header block as its first // non-phi instructions. - assert(CM.foldTailByMasking() && "must fold the tail"); + VPBasicBlock *HeaderVPBB = Plan->getVectorLoopRegion()->getEntryBasicBlock(); auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi(); @@ -8014,9 +8048,10 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { VPBuilder::InsertPointGuard Guard(Builder); Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint); - if (CM.TTI.emitGetActiveLaneMask()) { + if (EmitGetActiveLaneMask != PredicationStyle::None) { VPValue *TC = Plan->getOrCreateTripCount(); - BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC}); + BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC}, + nullptr, "active.lane.mask"); } else { VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC}); @@ -8409,9 +8444,8 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( return RegSucc; } -VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr, - VPRecipeBase *PredRecipe, - VPlanPtr &Plan) { +VPRegionBlock *VPRecipeBuilder::createReplicateRegion( + Instruction *Instr, VPReplicateRecipe *PredRecipe, VPlanPtr &Plan) { // Instructions marked for predication are replicated and placed under an // if-then construct to prevent side-effects. @@ -8425,7 +8459,7 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr, auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe); auto *PHIRecipe = Instr->getType()->isVoidTy() ? nullptr - : new VPPredInstPHIRecipe(Plan->getOrAddVPValue(Instr)); + : new VPPredInstPHIRecipe(PredRecipe); if (PHIRecipe) { Plan->removeVPValueFor(Instr); Plan->addVPValue(Instr, PHIRecipe); @@ -8517,19 +8551,11 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF) { assert(OrigLoop->isInnermost() && "Inner loop expected."); - // Collect instructions from the original loop that will become trivially dead - // in the vectorized loop. We don't need to vectorize these instructions. For - // example, original induction update instructions can become dead because we - // separately emit induction "steps" when generating code for the new loop. - // Similarly, we create a new latch condition when setting up the structure - // of the new loop, so the old one can become dead. - SmallPtrSet<Instruction *, 4> DeadInstructions; - collectTriviallyDeadInstructions(DeadInstructions); - // Add assume instructions we need to drop to DeadInstructions, to prevent // them from being added to the VPlan. // TODO: We only need to drop assumes in blocks that get flattend. If the // control flow is preserved, we should keep them. + SmallPtrSet<Instruction *, 4> DeadInstructions; auto &ConditionalAssumes = Legal->getConditionalAssumes(); DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end()); @@ -8565,32 +8591,84 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, } } -// Add a VPCanonicalIVPHIRecipe starting at 0 to the header, a -// CanonicalIVIncrement{NUW} VPInstruction to increment it by VF * UF and a -// BranchOnCount VPInstruction to the latch. +// Add the necessary canonical IV and branch recipes required to control the +// loop. static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL, - bool HasNUW) { + bool HasNUW, + bool UseLaneMaskForLoopControlFlow) { Value *StartIdx = ConstantInt::get(IdxTy, 0); auto *StartV = Plan.getOrAddVPValue(StartIdx); + // Add a VPCanonicalIVPHIRecipe starting at 0 to the header. auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL); VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *Header = TopRegion->getEntryBasicBlock(); Header->insert(CanonicalIVPHI, Header->begin()); + // Add a CanonicalIVIncrement{NUW} VPInstruction to increment the scalar + // IV by VF * UF. auto *CanonicalIVIncrement = new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementNUW : VPInstruction::CanonicalIVIncrement, - {CanonicalIVPHI}, DL); + {CanonicalIVPHI}, DL, "index.next"); CanonicalIVPHI->addOperand(CanonicalIVIncrement); VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); EB->appendRecipe(CanonicalIVIncrement); - auto *BranchOnCount = - new VPInstruction(VPInstruction::BranchOnCount, - {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); - EB->appendRecipe(BranchOnCount); + if (UseLaneMaskForLoopControlFlow) { + // Create the active lane mask instruction in the vplan preheader. + VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock(); + + // We can't use StartV directly in the ActiveLaneMask VPInstruction, since + // we have to take unrolling into account. Each part needs to start at + // Part * VF + auto *CanonicalIVIncrementParts = + new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementForPartNUW + : VPInstruction::CanonicalIVIncrementForPart, + {StartV}, DL, "index.part.next"); + Preheader->appendRecipe(CanonicalIVIncrementParts); + + // Create the ActiveLaneMask instruction using the correct start values. + VPValue *TC = Plan.getOrCreateTripCount(); + auto *EntryALM = new VPInstruction(VPInstruction::ActiveLaneMask, + {CanonicalIVIncrementParts, TC}, DL, + "active.lane.mask.entry"); + Preheader->appendRecipe(EntryALM); + + // Now create the ActiveLaneMaskPhi recipe in the main loop using the + // preheader ActiveLaneMask instruction. + auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc()); + Header->insert(LaneMaskPhi, Header->getFirstNonPhi()); + + // Create the active lane mask for the next iteration of the loop. + CanonicalIVIncrementParts = + new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementForPartNUW + : VPInstruction::CanonicalIVIncrementForPart, + {CanonicalIVIncrement}, DL); + EB->appendRecipe(CanonicalIVIncrementParts); + + auto *ALM = new VPInstruction(VPInstruction::ActiveLaneMask, + {CanonicalIVIncrementParts, TC}, DL, + "active.lane.mask.next"); + EB->appendRecipe(ALM); + LaneMaskPhi->addOperand(ALM); + + // We have to invert the mask here because a true condition means jumping + // to the exit block. + auto *NotMask = new VPInstruction(VPInstruction::Not, ALM, DL); + EB->appendRecipe(NotMask); + + VPInstruction *BranchBack = + new VPInstruction(VPInstruction::BranchOnCond, {NotMask}, DL); + EB->appendRecipe(BranchBack); + } else { + // Add the BranchOnCount VPInstruction to the latch. + VPInstruction *BranchBack = new VPInstruction( + VPInstruction::BranchOnCount, + {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); + EB->appendRecipe(BranchBack); + } } // Add exit values to \p Plan. VPLiveOuts are added for each LCSSA phi in the @@ -8691,7 +8769,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DLInst ? DLInst->getDebugLoc() : DebugLoc(), - !CM.foldTailByMasking()); + !CM.foldTailByMasking(), + CM.useActiveLaneMaskForControlFlow()); // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. @@ -8961,8 +9040,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE()); VPlanTransforms::sinkScalarOperands(*Plan); - VPlanTransforms::mergeReplicateRegions(*Plan); VPlanTransforms::removeDeadRecipes(*Plan); + VPlanTransforms::mergeReplicateRegions(*Plan); VPlanTransforms::removeRedundantExpandSCEVRecipes(*Plan); // Fold Exit block into its predecessor if possible. @@ -9006,7 +9085,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { Term->eraseFromParent(); addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(), - true); + true, CM.useActiveLaneMaskForControlFlow()); return Plan; } @@ -9078,7 +9157,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe); Plan->removeVPValueFor(R); Plan->addVPValue(R, RedRecipe); - WidenRecipe->getParent()->insert(RedRecipe, WidenRecipe->getIterator()); + // Append the recipe to the end of the VPBasicBlock because we need to + // ensure that it comes after all of it's inputs, including CondOp. + WidenRecipe->getParent()->appendRecipe(RedRecipe); WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe); WidenRecipe->eraseFromParent(); @@ -9151,229 +9232,6 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { *this, State); } -void VPWidenSelectRecipe::execute(VPTransformState &State) { - auto &I = *cast<SelectInst>(getUnderlyingInstr()); - State.setDebugLocFromInst(&I); - - // The condition can be loop invariant but still defined inside the - // loop. This means that we can't just use the original 'cond' value. - // We have to take the 'vectorized' value and pick the first lane. - // Instcombine will make this a no-op. - auto *InvarCond = - InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); - Value *Op0 = State.get(getOperand(1), Part); - Value *Op1 = State.get(getOperand(2), Part); - Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); - State.set(this, Sel, Part); - State.addMetadata(Sel, &I); - } -} - -void VPWidenRecipe::execute(VPTransformState &State) { - auto &I = *cast<Instruction>(getUnderlyingValue()); - auto &Builder = State.Builder; - switch (I.getOpcode()) { - case Instruction::Call: - case Instruction::Br: - case Instruction::PHI: - case Instruction::GetElementPtr: - case Instruction::Select: - llvm_unreachable("This instruction is handled by a different recipe."); - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::URem: - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::FNeg: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - // Just widen unops and binops. - State.setDebugLocFromInst(&I); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - SmallVector<Value *, 2> Ops; - for (VPValue *VPOp : operands()) - Ops.push_back(State.get(VPOp, Part)); - - Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); - - if (auto *VecOp = dyn_cast<Instruction>(V)) { - VecOp->copyIRFlags(&I); - - // If the instruction is vectorized and was in a basic block that needed - // predication, we can't propagate poison-generating flags (nuw/nsw, - // exact, etc.). The control flow has been linearized and the - // instruction is no longer guarded by the predicate, which could make - // the flag properties to no longer hold. - if (State.MayGeneratePoisonRecipes.contains(this)) - VecOp->dropPoisonGeneratingFlags(); - } - - // Use this vector value for all users of the original instruction. - State.set(this, V, Part); - State.addMetadata(V, &I); - } - - break; - } - case Instruction::Freeze: { - State.setDebugLocFromInst(&I); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *Op = State.get(getOperand(0), Part); - - Value *Freeze = Builder.CreateFreeze(Op); - State.set(this, Freeze, Part); - } - break; - } - case Instruction::ICmp: - case Instruction::FCmp: { - // Widen compares. Generate vector compares. - bool FCmp = (I.getOpcode() == Instruction::FCmp); - auto *Cmp = cast<CmpInst>(&I); - State.setDebugLocFromInst(Cmp); - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *A = State.get(getOperand(0), Part); - Value *B = State.get(getOperand(1), Part); - Value *C = nullptr; - if (FCmp) { - // Propagate fast math flags. - IRBuilder<>::FastMathFlagGuard FMFG(Builder); - Builder.setFastMathFlags(Cmp->getFastMathFlags()); - C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); - } else { - C = Builder.CreateICmp(Cmp->getPredicate(), A, B); - } - State.set(this, C, Part); - State.addMetadata(C, &I); - } - - break; - } - - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - auto *CI = cast<CastInst>(&I); - State.setDebugLocFromInst(CI); - - /// Vectorize casts. - Type *DestTy = (State.VF.isScalar()) - ? CI->getType() - : VectorType::get(CI->getType(), State.VF); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *A = State.get(getOperand(0), Part); - Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); - State.set(this, Cast, Part); - State.addMetadata(Cast, &I); - } - break; - } - default: - // This instruction is not vectorized by simple widening. - LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); - llvm_unreachable("Unhandled instruction!"); - } // end of switch. -} - -void VPWidenGEPRecipe::execute(VPTransformState &State) { - auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); - // Construct a vector GEP by widening the operands of the scalar GEP as - // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP - // results in a vector of pointers when at least one operand of the GEP - // is vector-typed. Thus, to keep the representation compact, we only use - // vector-typed operands for loop-varying values. - - if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) { - // If we are vectorizing, but the GEP has only loop-invariant operands, - // the GEP we build (by only using vector-typed operands for - // loop-varying values) would be a scalar pointer. Thus, to ensure we - // produce a vector of pointers, we need to either arbitrarily pick an - // operand to broadcast, or broadcast a clone of the original GEP. - // Here, we broadcast a clone of the original. - // - // TODO: If at some point we decide to scalarize instructions having - // loop-invariant operands, this special case will no longer be - // required. We would add the scalarization decision to - // collectLoopScalars() and teach getVectorValue() to broadcast - // the lane-zero scalar value. - auto *Clone = State.Builder.Insert(GEP->clone()); - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone); - State.set(this, EntryPart, Part); - State.addMetadata(EntryPart, GEP); - } - } else { - // If the GEP has at least one loop-varying operand, we are sure to - // produce a vector of pointers. But if we are only unrolling, we want - // to produce a scalar GEP for each unroll part. Thus, the GEP we - // produce with the code below will be scalar (if VF == 1) or vector - // (otherwise). Note that for the unroll-only case, we still maintain - // values in the vector mapping with initVector, as we do for other - // instructions. - for (unsigned Part = 0; Part < State.UF; ++Part) { - // The pointer operand of the new GEP. If it's loop-invariant, we - // won't broadcast it. - auto *Ptr = IsPtrLoopInvariant - ? State.get(getOperand(0), VPIteration(0, 0)) - : State.get(getOperand(0), Part); - - // Collect all the indices for the new GEP. If any index is - // loop-invariant, we won't broadcast it. - SmallVector<Value *, 4> Indices; - for (unsigned I = 1, E = getNumOperands(); I < E; I++) { - VPValue *Operand = getOperand(I); - if (IsIndexLoopInvariant[I - 1]) - Indices.push_back(State.get(Operand, VPIteration(0, 0))); - else - Indices.push_back(State.get(Operand, Part)); - } - - // If the GEP instruction is vectorized and was in a basic block that - // needed predication, we can't propagate the poison-generating 'inbounds' - // flag. The control flow has been linearized and the GEP is no longer - // guarded by the predicate, which could make the 'inbounds' properties to - // no longer hold. - bool IsInBounds = - GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0; - - // Create the new GEP. Note that this GEP may be a scalar if VF == 1, - // but it should be a vector, otherwise. - auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, - Indices, "", IsInBounds); - assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && - "NewGEP is not a pointer vector"); - State.set(this, NewGEP, Part); - State.addMetadata(NewGEP, GEP); - } - } -} - void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); @@ -9632,45 +9490,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { } } -void VPBlendRecipe::execute(VPTransformState &State) { - State.setDebugLocFromInst(Phi); - // We know that all PHIs in non-header blocks are converted into - // selects, so we don't have to worry about the insertion order and we - // can just use the builder. - // At this point we generate the predication tree. There may be - // duplications since this is a simple recursive scan, but future - // optimizations will clean it up. - - unsigned NumIncoming = getNumIncomingValues(); - - // Generate a sequence of selects of the form: - // SELECT(Mask3, In3, - // SELECT(Mask2, In2, - // SELECT(Mask1, In1, - // In0))) - // Note that Mask0 is never used: lanes for which no path reaches this phi and - // are essentially undef are taken from In0. - InnerLoopVectorizer::VectorParts Entry(State.UF); - for (unsigned In = 0; In < NumIncoming; ++In) { - for (unsigned Part = 0; Part < State.UF; ++Part) { - // We might have single edge PHIs (blocks) - use an identity - // 'select' for the first PHI operand. - Value *In0 = State.get(getIncomingValue(In), Part); - if (In == 0) - Entry[Part] = In0; // Initialize with the first incoming value. - else { - // Select between the current value and the previous incoming edge - // based on the incoming mask. - Value *Cond = State.get(getMask(In), Part); - Entry[Part] = - State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); - } - } - } - for (unsigned Part = 0; Part < State.UF; ++Part) - State.set(this, Entry[Part], Part); -} - void VPInterleaveRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Interleave group being replicated."); State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(), @@ -9758,32 +9577,6 @@ void VPReplicateRecipe::execute(VPTransformState &State) { State); } -void VPBranchOnMaskRecipe::execute(VPTransformState &State) { - assert(State.Instance && "Branch on Mask works only on single instance."); - - unsigned Part = State.Instance->Part; - unsigned Lane = State.Instance->Lane.getKnownLane(); - - Value *ConditionBit = nullptr; - VPValue *BlockInMask = getMask(); - if (BlockInMask) { - ConditionBit = State.get(BlockInMask, Part); - if (ConditionBit->getType()->isVectorTy()) - ConditionBit = State.Builder.CreateExtractElement( - ConditionBit, State.Builder.getInt32(Lane)); - } else // Block in mask is all-one. - ConditionBit = State.Builder.getTrue(); - - // Replace the temporary unreachable terminator with a new conditional branch, - // whose two destinations will be set later when they are created. - auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); - assert(isa<UnreachableInst>(CurrentTerminator) && - "Expected to replace unreachable terminator with conditional branch."); - auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); - CondBr->setSuccessor(0, nullptr); - ReplaceInstWithInst(CurrentTerminator, CondBr); -} - void VPPredInstPHIRecipe::execute(VPTransformState &State) { assert(State.Instance && "Predicated instruction PHI works per instance."); Instruction *ScalarPredInst = @@ -10103,8 +9896,7 @@ static bool processLoopInVPlanNativePath( // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an // optional argument if we don't need it in the future. - LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE, Hints, - Requirements, ORE); + LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE, Hints, ORE); // Get user vectorization factor. ElementCount UserVF = Hints.getWidth(); @@ -10123,10 +9915,10 @@ static bool processLoopInVPlanNativePath( VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); { - GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, + GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI, F->getParent()->getDataLayout()); - InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL, - &CM, BFI, PSI, Checks); + InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, + VF.Width, 1, LVL, &CM, BFI, PSI, Checks); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false); @@ -10183,6 +9975,105 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) { } } +static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks, + VectorizationFactor &VF, + Optional<unsigned> VScale, Loop *L, + ScalarEvolution &SE) { + InstructionCost CheckCost = Checks.getCost(); + if (!CheckCost.isValid()) + return false; + + // When interleaving only scalar and vector cost will be equal, which in turn + // would lead to a divide by 0. Fall back to hard threshold. + if (VF.Width.isScalar()) { + if (CheckCost > VectorizeMemoryCheckThreshold) { + LLVM_DEBUG( + dbgs() + << "LV: Interleaving only is not profitable due to runtime checks\n"); + return false; + } + return true; + } + + // The scalar cost should only be 0 when vectorizing with a user specified VF/IC. In those cases, runtime checks should always be generated. + double ScalarC = *VF.ScalarCost.getValue(); + if (ScalarC == 0) + return true; + + // First, compute the minimum iteration count required so that the vector + // loop outperforms the scalar loop. + // The total cost of the scalar loop is + // ScalarC * TC + // where + // * TC is the actual trip count of the loop. + // * ScalarC is the cost of a single scalar iteration. + // + // The total cost of the vector loop is + // RtC + VecC * (TC / VF) + EpiC + // where + // * RtC is the cost of the generated runtime checks + // * VecC is the cost of a single vector iteration. + // * TC is the actual trip count of the loop + // * VF is the vectorization factor + // * EpiCost is the cost of the generated epilogue, including the cost + // of the remaining scalar operations. + // + // Vectorization is profitable once the total vector cost is less than the + // total scalar cost: + // RtC + VecC * (TC / VF) + EpiC < ScalarC * TC + // + // Now we can compute the minimum required trip count TC as + // (RtC + EpiC) / (ScalarC - (VecC / VF)) < TC + // + // For now we assume the epilogue cost EpiC = 0 for simplicity. Note that + // the computations are performed on doubles, not integers and the result + // is rounded up, hence we get an upper estimate of the TC. + unsigned IntVF = VF.Width.getKnownMinValue(); + if (VF.Width.isScalable()) { + unsigned AssumedMinimumVscale = 1; + if (VScale) + AssumedMinimumVscale = *VScale; + IntVF *= AssumedMinimumVscale; + } + double VecCOverVF = double(*VF.Cost.getValue()) / IntVF; + double RtC = *CheckCost.getValue(); + double MinTC1 = RtC / (ScalarC - VecCOverVF); + + // Second, compute a minimum iteration count so that the cost of the + // runtime checks is only a fraction of the total scalar loop cost. This + // adds a loop-dependent bound on the overhead incurred if the runtime + // checks fail. In case the runtime checks fail, the cost is RtC + ScalarC + // * TC. To bound the runtime check to be a fraction 1/X of the scalar + // cost, compute + // RtC < ScalarC * TC * (1 / X) ==> RtC * X / ScalarC < TC + double MinTC2 = RtC * 10 / ScalarC; + + // Now pick the larger minimum. If it is not a multiple of VF, choose the + // next closest multiple of VF. This should partly compensate for ignoring + // the epilogue cost. + uint64_t MinTC = std::ceil(std::max(MinTC1, MinTC2)); + VF.MinProfitableTripCount = ElementCount::getFixed(alignTo(MinTC, IntVF)); + + LLVM_DEBUG( + dbgs() << "LV: Minimum required TC for runtime checks to be profitable:" + << VF.MinProfitableTripCount << "\n"); + + // Skip vectorization if the expected trip count is less than the minimum + // required trip count. + if (auto ExpectedTC = getSmallBestKnownTC(SE, L)) { + if (ElementCount::isKnownLT(ElementCount::getFixed(*ExpectedTC), + VF.MinProfitableTripCount)) { + LLVM_DEBUG(dbgs() << "LV: Vectorization is not beneficial: expected " + "trip count < minimum profitable VF (" + << *ExpectedTC << " < " << VF.MinProfitableTripCount + << ")\n"); + + return false; + } + } + return true; +} + LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts) : InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced || !EnableLoopInterleaving), @@ -10340,8 +10231,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { CM.collectElementTypesForWidening(); // Use the planner for vectorization. - LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE, Hints, - Requirements, ORE); + LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE, Hints, ORE); // Get user vectorization factor and interleave count. ElementCount UserVF = Hints.getWidth(); @@ -10353,10 +10243,25 @@ bool LoopVectorizePass::processLoop(Loop *L) { VectorizationFactor VF = VectorizationFactor::Disabled(); unsigned IC = 1; - GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, + GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI, F->getParent()->getDataLayout()); if (MaybeVF) { - if (LVP.requiresTooManyRuntimeChecks()) { + VF = *MaybeVF; + // Select the interleave count. + IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue()); + + unsigned SelectedIC = std::max(IC, UserIC); + // Optimistically generate runtime checks if they are needed. Drop them if + // they turn out to not be profitable. + if (VF.Width.isVector() || SelectedIC > 1) + Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC); + + // Check if it is profitable to vectorize with runtime checks. + bool ForceVectorization = + Hints.getForce() == LoopVectorizeHints::FK_Enabled; + if (!ForceVectorization && + !areRuntimeChecksProfitable(Checks, VF, CM.getVScaleForTuning(), L, + *PSE.getSE())) { ORE->emit([&]() { return OptimizationRemarkAnalysisAliasing( DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(), @@ -10368,15 +10273,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { Hints.emitRemarkWithHints(); return false; } - VF = *MaybeVF; - // Select the interleave count. - IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue()); - - unsigned SelectedIC = std::max(IC, UserIC); - // Optimistically generate runtime checks if they are needed. Drop them if - // they turn out to not be profitable. - if (VF.Width.isVector() || SelectedIC > 1) - Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC); } // Identify the diagnostic messages that should be produced. @@ -10533,8 +10429,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (!MainILV.areSafetyChecksAdded()) DisableRuntimeUnroll = true; } else { - InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, - &LVL, &CM, BFI, PSI, Checks); + InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, + VF.MinProfitableTripCount, IC, &LVL, &CM, BFI, + PSI, Checks); VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); @@ -10564,7 +10461,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupEpilogue}); if (RemainderLoopID) { - L->setLoopID(RemainderLoopID.getValue()); + L->setLoopID(RemainderLoopID.value()); } else { if (DisableRuntimeUnroll) AddRuntimeUnrollDisableMetaData(L); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 019a09665a67..e136cd9aedac 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2637,7 +2637,7 @@ private: AliasCacheKey key = std::make_pair(Inst1, Inst2); Optional<bool> &result = AliasCache[key]; if (result) { - return result.getValue(); + return result.value(); } bool aliased = true; if (Loc1.Ptr && isSimple(Inst1)) @@ -4592,7 +4592,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, }; InstructionsState S = getSameOpcode(VL); - if (Depth == RecursionMaxDepth) { + + // Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of + // a load), in which case peek through to include it in the tree, without + // ballooning over-budget. + if (Depth >= RecursionMaxDepth && + !(S.MainOp && isa<Instruction>(S.MainOp) && S.MainOp == S.AltOp && + VL.size() >= 4 && + (match(S.MainOp, m_Load(m_Value())) || all_of(VL, [&S](const Value *I) { + return match(I, + m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) && + cast<Instruction>(I)->getOpcode() == + cast<Instruction>(S.MainOp)->getOpcode(); + })))) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); if (TryToFindDuplicates(S)) newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, @@ -11217,7 +11229,7 @@ public: return OptimizationRemarkMissed( SV_NAME, "HorSLPNotBeneficial", ReducedValsToOps.find(VL[0])->second.front()) - << "Vectorizing horizontal reduction is possible" + << "Vectorizing horizontal reduction is possible " << "but not beneficial with cost " << ore::NV("Cost", Cost) << " and threshold " << ore::NV("Threshold", -SLPCostThreshold); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 97f2b1a93815..c7949c42c03e 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -159,7 +159,8 @@ public: /// Create a replicating region for instruction \p I that requires /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. - VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe, + VPRegionBlock *createReplicateRegion(Instruction *I, + VPReplicateRecipe *PredRecipe, VPlanPtr &Plan); /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 4d709097c306..30032dda7f60 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -248,25 +248,27 @@ void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) { } void VPTransformState::setDebugLocFromInst(const Value *V) { - if (const Instruction *Inst = dyn_cast_or_null<Instruction>(V)) { - const DILocation *DIL = Inst->getDebugLoc(); - - // When a FSDiscriminator is enabled, we don't need to add the multiply - // factors to the discriminators. - if (DIL && Inst->getFunction()->isDebugInfoForProfiling() && - !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) { - // FIXME: For scalable vectors, assume vscale=1. - auto NewDIL = - DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); - if (NewDIL) - Builder.SetCurrentDebugLocation(*NewDIL); - else - LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " - << DIL->getFilename() << " Line: " << DIL->getLine()); - } else - Builder.SetCurrentDebugLocation(DIL); - } else + const Instruction *Inst = dyn_cast<Instruction>(V); + if (!Inst) { Builder.SetCurrentDebugLocation(DebugLoc()); + return; + } + + const DILocation *DIL = Inst->getDebugLoc(); + // When a FSDiscriminator is enabled, we don't need to add the multiply + // factors to the discriminators. + if (DIL && Inst->getFunction()->isDebugInfoForProfiling() && + !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) { + // FIXME: For scalable vectors, assume vscale=1. + auto NewDIL = + DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); + if (NewDIL) + Builder.SetCurrentDebugLocation(*NewDIL); + else + LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " + << DIL->getFilename() << " Line: " << DIL->getLine()); + } else + Builder.SetCurrentDebugLocation(DIL); } BasicBlock * @@ -566,6 +568,24 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, } #endif +VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() { + VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); + for (VPRecipeBase &R : Header->phis()) { + if (isa<VPActiveLaneMaskPHIRecipe>(&R)) + return cast<VPActiveLaneMaskPHIRecipe>(&R); + } + return nullptr; +} + +static bool canSimplifyBranchOnCond(VPInstruction *Term) { + VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0)); + if (!Not || Not->getOpcode() != VPInstruction::Not) + return false; + + VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0)); + return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask; +} + void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, Value *CanonicalIVStartValue, VPTransformState &State, @@ -573,11 +593,15 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock(); auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back()); - // Try to simplify BranchOnCount to 'BranchOnCond true' if TC <= VF * UF when - // preparing to execute the plan for the main vector loop. - if (!IsEpilogueVectorization && Term && - Term->getOpcode() == VPInstruction::BranchOnCount && - isa<ConstantInt>(TripCountV)) { + // Try to simplify the branch condition if TC <= VF * UF when preparing to + // execute the plan for the main vector loop. We only do this if the + // terminator is: + // 1. BranchOnCount, or + // 2. BranchOnCond where the input is Not(ActiveLaneMask). + if (!IsEpilogueVectorization && Term && isa<ConstantInt>(TripCountV) && + (Term->getOpcode() == VPInstruction::BranchOnCount || + (Term->getOpcode() == VPInstruction::BranchOnCond && + canSimplifyBranchOnCond(Term)))) { ConstantInt *C = cast<ConstantInt>(TripCountV); uint64_t TCVal = C->getZExtValue(); if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) { @@ -697,7 +721,8 @@ void VPlan::execute(VPTransformState *State) { // generated. bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) || isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) || - cast<VPReductionPHIRecipe>(PhiR)->isOrdered(); + (isa<VPReductionPHIRecipe>(PhiR) && + cast<VPReductionPHIRecipe>(PhiR)->isOrdered()); unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF; for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 09da4a545d0d..f009a7ee6b4b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -784,6 +784,10 @@ public: ActiveLaneMask, CanonicalIVIncrement, CanonicalIVIncrementNUW, + // The next two are similar to the above, but instead increment the + // canonical IV separately for each unrolled part. + CanonicalIVIncrementForPart, + CanonicalIVIncrementForPartNUW, BranchOnCount, BranchOnCond }; @@ -794,6 +798,9 @@ private: FastMathFlags FMF; DebugLoc DL; + /// An optional name that can be used for the generated IR instruction. + const std::string Name; + /// Utility method serving execute(): generates a single instance of the /// modeled instruction. void generateInstruction(VPTransformState &State, unsigned Part); @@ -802,14 +809,15 @@ protected: void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); } public: - VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL) + VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL, + const Twine &Name = "") : VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands), VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode), - DL(DL) {} + DL(DL), Name(Name.str()) {} VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, - DebugLoc DL = {}) - : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL) {} + DebugLoc DL = {}, const Twine &Name = "") + : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {} /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPValue *V) { @@ -818,7 +826,7 @@ public: VPInstruction *clone() const { SmallVector<VPValue *, 2> Operands(operands()); - return new VPInstruction(Opcode, Operands, DL); + return new VPInstruction(Opcode, Operands, DL, Name); } /// Method to support type inquiry through isa, cast, and dyn_cast. @@ -897,6 +905,8 @@ public: case VPInstruction::ActiveLaneMask: case VPInstruction::CanonicalIVIncrement: case VPInstruction::CanonicalIVIncrementNUW: + case VPInstruction::CanonicalIVIncrementForPart: + case VPInstruction::CanonicalIVIncrementForPartNUW: case VPInstruction::BranchOnCount: return true; }; @@ -1125,6 +1135,7 @@ public: /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *B) { return B->getVPDefID() == VPRecipeBase::VPCanonicalIVPHISC || + B->getVPDefID() == VPRecipeBase::VPActiveLaneMaskPHISC || B->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC || B->getVPDefID() == VPRecipeBase::VPReductionPHISC || B->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC || @@ -1132,6 +1143,7 @@ public: } static inline bool classof(const VPValue *V) { return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC || + V->getVPValueID() == VPValue::VPVActiveLaneMaskPHISC || V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC || V->getVPValueID() == VPValue::VPVReductionPHISC || V->getVPValueID() == VPValue::VPVWidenIntOrFpInductionSC || @@ -1861,6 +1873,42 @@ public: } }; +/// A recipe for generating the active lane mask for the vector loop that is +/// used to predicate the vector operations. +/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and +/// remove VPActiveLaneMaskPHIRecipe. +class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe { + DebugLoc DL; + +public: + VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL) + : VPHeaderPHIRecipe(VPValue::VPVActiveLaneMaskPHISC, + VPActiveLaneMaskPHISC, nullptr, StartMask), + DL(DL) {} + + ~VPActiveLaneMaskPHIRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPActiveLaneMaskPHISC; + } + static inline bool classof(const VPHeaderPHIRecipe *D) { + return D->getVPDefID() == VPActiveLaneMaskPHISC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVActiveLaneMaskPHISC; + } + + /// Generate the active lane mask phi of the vector loop. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A Recipe for widening the canonical induction variable of the vector loop. class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { public: @@ -2656,6 +2704,10 @@ public: return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin()); } + /// Find and return the VPActiveLaneMaskPHIRecipe from the header - there + /// be only one at most. If there isn't one, then return nullptr. + VPActiveLaneMaskPHIRecipe *getActiveLaneMaskPhi(); + void addLiveOut(PHINode *PN, VPValue *V); void clearLiveOuts() { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 92422b17457c..fdd901a4a70d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -26,13 +26,19 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include <cassert> using namespace llvm; +using VectorParts = SmallVector<Value *, 2>; + extern cl::opt<bool> EnableVPlanNativePath; +#define LV_NAME "loop-vectorize" +#define DEBUG_TYPE LV_NAME + bool VPRecipeBase::mayWriteToMemory() const { switch (getVPDefID()) { case VPWidenMemoryInstructionSC: { @@ -186,7 +192,8 @@ void VPInstruction::generateInstruction(VPTransformState &State, if (Instruction::isBinaryOp(getOpcode())) { Value *A = State.get(getOperand(0), Part); Value *B = State.get(getOperand(1), Part); - Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); + Value *V = + Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); State.set(this, V, Part); return; } @@ -194,14 +201,14 @@ void VPInstruction::generateInstruction(VPTransformState &State, switch (getOpcode()) { case VPInstruction::Not: { Value *A = State.get(getOperand(0), Part); - Value *V = Builder.CreateNot(A); + Value *V = Builder.CreateNot(A, Name); State.set(this, V, Part); break; } case VPInstruction::ICmpULE: { Value *IV = State.get(getOperand(0), Part); Value *TC = State.get(getOperand(1), Part); - Value *V = Builder.CreateICmpULE(IV, TC); + Value *V = Builder.CreateICmpULE(IV, TC, Name); State.set(this, V, Part); break; } @@ -209,7 +216,7 @@ void VPInstruction::generateInstruction(VPTransformState &State, Value *Cond = State.get(getOperand(0), Part); Value *Op1 = State.get(getOperand(1), Part); Value *Op2 = State.get(getOperand(2), Part); - Value *V = Builder.CreateSelect(Cond, Op1, Op2); + Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name); State.set(this, V, Part); break; } @@ -223,7 +230,7 @@ void VPInstruction::generateInstruction(VPTransformState &State, auto *PredTy = VectorType::get(Int1Ty, State.VF); Instruction *Call = Builder.CreateIntrinsic( Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, - {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); + {VIVElem0, ScalarTC}, nullptr, Name); State.set(this, Call, Part); break; } @@ -247,7 +254,8 @@ void VPInstruction::generateInstruction(VPTransformState &State, State.set(this, PartMinus1, Part); } else { Value *V2 = State.get(getOperand(1), Part); - State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part); + State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name), + Part); } break; } @@ -261,7 +269,7 @@ void VPInstruction::generateInstruction(VPTransformState &State, // elements) times the unroll factor (num of SIMD instructions). Value *Step = createStepForVF(Builder, Phi->getType(), State.VF, State.UF); - Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false); + Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false); } else { Next = State.get(this, 0); } @@ -269,6 +277,23 @@ void VPInstruction::generateInstruction(VPTransformState &State, State.set(this, Next, Part); break; } + + case VPInstruction::CanonicalIVIncrementForPart: + case VPInstruction::CanonicalIVIncrementForPartNUW: { + bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW; + auto *IV = State.get(getOperand(0), VPIteration(0, 0)); + if (Part == 0) { + State.set(this, IV, Part); + break; + } + + // The canonical IV is incremented by the vectorization factor (num of SIMD + // elements) times the unroll part. + Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); + Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false); + State.set(this, Next, Part); + break; + } case VPInstruction::BranchOnCond: { if (Part != 0) break; @@ -370,6 +395,12 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::BranchOnCond: O << "branch-on-cond"; break; + case VPInstruction::CanonicalIVIncrementForPart: + O << "VF * Part + "; + break; + case VPInstruction::CanonicalIVIncrementForPartNUW: + O << "VF * Part +(nuw) "; + break; case VPInstruction::BranchOnCount: O << "branch-on-count "; break; @@ -431,7 +462,158 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, getOperand(2)->printAsOperand(O, SlotTracker); O << (InvariantCond ? " (condition is loop invariant)" : ""); } +#endif + +void VPWidenSelectRecipe::execute(VPTransformState &State) { + auto &I = *cast<SelectInst>(getUnderlyingInstr()); + State.setDebugLocFromInst(&I); + + // The condition can be loop invariant but still defined inside the + // loop. This means that we can't just use the original 'cond' value. + // We have to take the 'vectorized' value and pick the first lane. + // Instcombine will make this a no-op. + auto *InvarCond = + InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); + Value *Op0 = State.get(getOperand(1), Part); + Value *Op1 = State.get(getOperand(2), Part); + Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); + State.set(this, Sel, Part); + State.addMetadata(Sel, &I); + } +} + +void VPWidenRecipe::execute(VPTransformState &State) { + auto &I = *cast<Instruction>(getUnderlyingValue()); + auto &Builder = State.Builder; + switch (I.getOpcode()) { + case Instruction::Call: + case Instruction::Br: + case Instruction::PHI: + case Instruction::GetElementPtr: + case Instruction::Select: + llvm_unreachable("This instruction is handled by a different recipe."); + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::URem: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::FNeg: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // Just widen unops and binops. + State.setDebugLocFromInst(&I); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + SmallVector<Value *, 2> Ops; + for (VPValue *VPOp : operands()) + Ops.push_back(State.get(VPOp, Part)); + + Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); + + if (auto *VecOp = dyn_cast<Instruction>(V)) { + VecOp->copyIRFlags(&I); + + // If the instruction is vectorized and was in a basic block that needed + // predication, we can't propagate poison-generating flags (nuw/nsw, + // exact, etc.). The control flow has been linearized and the + // instruction is no longer guarded by the predicate, which could make + // the flag properties to no longer hold. + if (State.MayGeneratePoisonRecipes.contains(this)) + VecOp->dropPoisonGeneratingFlags(); + } + + // Use this vector value for all users of the original instruction. + State.set(this, V, Part); + State.addMetadata(V, &I); + } + + break; + } + case Instruction::Freeze: { + State.setDebugLocFromInst(&I); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *Op = State.get(getOperand(0), Part); + + Value *Freeze = Builder.CreateFreeze(Op); + State.set(this, Freeze, Part); + } + break; + } + case Instruction::ICmp: + case Instruction::FCmp: { + // Widen compares. Generate vector compares. + bool FCmp = (I.getOpcode() == Instruction::FCmp); + auto *Cmp = cast<CmpInst>(&I); + State.setDebugLocFromInst(Cmp); + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *A = State.get(getOperand(0), Part); + Value *B = State.get(getOperand(1), Part); + Value *C = nullptr; + if (FCmp) { + // Propagate fast math flags. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(Cmp->getFastMathFlags()); + C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); + } else { + C = Builder.CreateICmp(Cmp->getPredicate(), A, B); + } + State.set(this, C, Part); + State.addMetadata(C, &I); + } + break; + } + + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + auto *CI = cast<CastInst>(&I); + State.setDebugLocFromInst(CI); + + /// Vectorize casts. + Type *DestTy = (State.VF.isScalar()) + ? CI->getType() + : VectorType::get(CI->getType(), State.VF); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *A = State.get(getOperand(0), Part); + Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); + State.set(this, Cast, Part); + State.addMetadata(Cast, &I); + } + break; + } + default: + // This instruction is not vectorized by simple widening. + LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); + llvm_unreachable("Unhandled instruction!"); + } // end of switch. +} +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN "; @@ -487,7 +669,82 @@ void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, O << Indent << "= SCALAR-STEPS "; printOperands(O, SlotTracker); } +#endif + +void VPWidenGEPRecipe::execute(VPTransformState &State) { + auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); + // Construct a vector GEP by widening the operands of the scalar GEP as + // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP + // results in a vector of pointers when at least one operand of the GEP + // is vector-typed. Thus, to keep the representation compact, we only use + // vector-typed operands for loop-varying values. + + if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) { + // If we are vectorizing, but the GEP has only loop-invariant operands, + // the GEP we build (by only using vector-typed operands for + // loop-varying values) would be a scalar pointer. Thus, to ensure we + // produce a vector of pointers, we need to either arbitrarily pick an + // operand to broadcast, or broadcast a clone of the original GEP. + // Here, we broadcast a clone of the original. + // + // TODO: If at some point we decide to scalarize instructions having + // loop-invariant operands, this special case will no longer be + // required. We would add the scalarization decision to + // collectLoopScalars() and teach getVectorValue() to broadcast + // the lane-zero scalar value. + auto *Clone = State.Builder.Insert(GEP->clone()); + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone); + State.set(this, EntryPart, Part); + State.addMetadata(EntryPart, GEP); + } + } else { + // If the GEP has at least one loop-varying operand, we are sure to + // produce a vector of pointers. But if we are only unrolling, we want + // to produce a scalar GEP for each unroll part. Thus, the GEP we + // produce with the code below will be scalar (if VF == 1) or vector + // (otherwise). Note that for the unroll-only case, we still maintain + // values in the vector mapping with initVector, as we do for other + // instructions. + for (unsigned Part = 0; Part < State.UF; ++Part) { + // The pointer operand of the new GEP. If it's loop-invariant, we + // won't broadcast it. + auto *Ptr = IsPtrLoopInvariant + ? State.get(getOperand(0), VPIteration(0, 0)) + : State.get(getOperand(0), Part); + + // Collect all the indices for the new GEP. If any index is + // loop-invariant, we won't broadcast it. + SmallVector<Value *, 4> Indices; + for (unsigned I = 1, E = getNumOperands(); I < E; I++) { + VPValue *Operand = getOperand(I); + if (IsIndexLoopInvariant[I - 1]) + Indices.push_back(State.get(Operand, VPIteration(0, 0))); + else + Indices.push_back(State.get(Operand, Part)); + } + + // If the GEP instruction is vectorized and was in a basic block that + // needed predication, we can't propagate the poison-generating 'inbounds' + // flag. The control flow has been linearized and the GEP is no longer + // guarded by the predicate, which could make the 'inbounds' properties to + // no longer hold. + bool IsInBounds = + GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0; + + // Create the new GEP. Note that this GEP may be a scalar if VF == 1, + // but it should be a vector, otherwise. + auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, + Indices, "", IsInBounds); + assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && + "NewGEP is not a pointer vector"); + State.set(this, NewGEP, Part); + State.addMetadata(NewGEP, GEP); + } + } +} +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-GEP "; @@ -501,7 +758,48 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, O << " = getelementptr "; printOperands(O, SlotTracker); } +#endif +void VPBlendRecipe::execute(VPTransformState &State) { + State.setDebugLocFromInst(Phi); + // We know that all PHIs in non-header blocks are converted into + // selects, so we don't have to worry about the insertion order and we + // can just use the builder. + // At this point we generate the predication tree. There may be + // duplications since this is a simple recursive scan, but future + // optimizations will clean it up. + + unsigned NumIncoming = getNumIncomingValues(); + + // Generate a sequence of selects of the form: + // SELECT(Mask3, In3, + // SELECT(Mask2, In2, + // SELECT(Mask1, In1, + // In0))) + // Note that Mask0 is never used: lanes for which no path reaches this phi and + // are essentially undef are taken from In0. + VectorParts Entry(State.UF); + for (unsigned In = 0; In < NumIncoming; ++In) { + for (unsigned Part = 0; Part < State.UF; ++Part) { + // We might have single edge PHIs (blocks) - use an identity + // 'select' for the first PHI operand. + Value *In0 = State.get(getIncomingValue(In), Part); + if (In == 0) + Entry[Part] = In0; // Initialize with the first incoming value. + else { + // Select between the current value and the previous incoming edge + // based on the incoming mask. + Value *Cond = State.get(getMask(In), Part); + Entry[Part] = + State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); + } + } + } + for (unsigned Part = 0; Part < State.UF; ++Part) + State.set(this, Entry[Part], Part); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "BLEND "; @@ -566,7 +864,35 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, if (AlsoPack) O << " (S->V)"; } +#endif +void VPBranchOnMaskRecipe::execute(VPTransformState &State) { + assert(State.Instance && "Branch on Mask works only on single instance."); + + unsigned Part = State.Instance->Part; + unsigned Lane = State.Instance->Lane.getKnownLane(); + + Value *ConditionBit = nullptr; + VPValue *BlockInMask = getMask(); + if (BlockInMask) { + ConditionBit = State.get(BlockInMask, Part); + if (ConditionBit->getType()->isVectorTy()) + ConditionBit = State.Builder.CreateExtractElement( + ConditionBit, State.Builder.getInt32(Lane)); + } else // Block in mask is all-one. + ConditionBit = State.Builder.getTrue(); + + // Replace the temporary unreachable terminator with a new conditional branch, + // whose two destinations will be set later when they are created. + auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); + assert(isa<UnreachableInst>(CurrentTerminator) && + "Expected to replace unreachable terminator with conditional branch."); + auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); + CondBr->setSuccessor(0, nullptr); + ReplaceInstWithInst(CurrentTerminator, CondBr); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "PHI-PREDICATED-INSTRUCTION "; @@ -838,3 +1164,28 @@ void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, printOperands(O, SlotTracker); } #endif + +// TODO: It would be good to use the existing VPWidenPHIRecipe instead and +// remove VPActiveLaneMaskPHIRecipe. +void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { + Value *StartMask = State.get(getOperand(0), Part); + PHINode *EntryPart = + State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); + EntryPart->addIncoming(StartMask, VectorPH); + EntryPart->setDebugLoc(DL); + State.set(this, EntryPart, Part); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "ACTIVE-LANE-MASK-PHI "; + + printAsOperand(O, SlotTracker); + O << " = phi "; + printOperands(O, SlotTracker); +} +#endif diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 5fc676834331..c99fae1b2ab4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -103,6 +103,7 @@ public: // Phi-like VPValues. Need to be kept together. VPVBlendSC, VPVCanonicalIVPHISC, + VPVActiveLaneMaskPHISC, VPVFirstOrderRecurrencePHISC, VPVWidenPHISC, VPVWidenIntOrFpInductionSC, @@ -358,6 +359,7 @@ public: // Phi-like recipes. Need to be kept together. VPBlendSC, VPCanonicalIVPHISC, + VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenPHISC, VPWidenIntOrFpInductionSC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index f917883145c0..3501de6ab38e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -133,32 +133,48 @@ void VPlanVerifier::verifyHierarchicalCFG( verifyRegionRec(TopRegion); } -bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { - auto Iter = depth_first( - VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); - for (const VPBasicBlock *VPBB : - VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) { - // Verify that phi-like recipes are at the beginning of the block, with no - // other recipes in between. - auto RecipeI = VPBB->begin(); - auto End = VPBB->end(); - while (RecipeI != End && RecipeI->isPhi()) - RecipeI++; +static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) { + // Verify that phi-like recipes are at the beginning of the block, with no + // other recipes in between. + auto RecipeI = VPBB->begin(); + auto End = VPBB->end(); + unsigned NumActiveLaneMaskPhiRecipes = 0; + while (RecipeI != End && RecipeI->isPhi()) { + if (isa<VPActiveLaneMaskPHIRecipe>(RecipeI)) + NumActiveLaneMaskPhiRecipes++; + RecipeI++; + } - while (RecipeI != End) { - if (RecipeI->isPhi() && !isa<VPBlendRecipe>(&*RecipeI)) { - errs() << "Found phi-like recipe after non-phi recipe"; + if (NumActiveLaneMaskPhiRecipes > 1) { + errs() << "There should be no more than one VPActiveLaneMaskPHIRecipe"; + return false; + } + + while (RecipeI != End) { + if (RecipeI->isPhi() && !isa<VPBlendRecipe>(&*RecipeI)) { + errs() << "Found phi-like recipe after non-phi recipe"; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - errs() << ": "; - RecipeI->dump(); - errs() << "after\n"; - std::prev(RecipeI)->dump(); + errs() << ": "; + RecipeI->dump(); + errs() << "after\n"; + std::prev(RecipeI)->dump(); #endif - return false; - } - RecipeI++; + return false; } + RecipeI++; + } + + return true; +} + +bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { + auto Iter = depth_first( + VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); + for (const VPBasicBlock *VPBB : + VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) { + if (!verifyVPBasicBlock(VPBB)) + return false; } const VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); @@ -181,15 +197,16 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { } if (Exiting->empty()) { - errs() << "VPlan vector loop exiting block must end with BranchOnCount " - "VPInstruction but is empty\n"; + errs() << "VPlan vector loop exiting block must end with BranchOnCount or " + "BranchOnCond VPInstruction but is empty\n"; return false; } auto *LastInst = dyn_cast<VPInstruction>(std::prev(Exiting->end())); - if (!LastInst || LastInst->getOpcode() != VPInstruction::BranchOnCount) { - errs() << "VPlan vector loop exit must end with BranchOnCount " - "VPInstruction\n"; + if (!LastInst || (LastInst->getOpcode() != VPInstruction::BranchOnCount && + LastInst->getOpcode() != VPInstruction::BranchOnCond)) { + errs() << "VPlan vector loop exit must end with BranchOnCount or " + "BranchOnCond VPInstruction\n"; return false; } diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 90598937affc..d12624ffb824 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -414,6 +414,10 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex, static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder) { + // Shufflevectors can only be created for fixed-width vectors. + if (!isa<FixedVectorType>(ExtElt->getOperand(0)->getType())) + return nullptr; + // If the extract can be constant-folded, this code is unsimplified. Defer // to other passes to handle that. Value *X = ExtElt->getVectorOperand(); @@ -1249,14 +1253,20 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() || VT != Op0->getType()) return false; - auto *SVI0A = dyn_cast<ShuffleVectorInst>(Op0->getOperand(0)); - auto *SVI0B = dyn_cast<ShuffleVectorInst>(Op0->getOperand(1)); - auto *SVI1A = dyn_cast<ShuffleVectorInst>(Op1->getOperand(0)); - auto *SVI1B = dyn_cast<ShuffleVectorInst>(Op1->getOperand(1)); + auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0)); + auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1)); + auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0)); + auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1)); + SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B}); auto checkSVNonOpUses = [&](Instruction *I) { if (!I || I->getOperand(0)->getType() != VT) return true; - return any_of(I->users(), [&](User *U) { return U != Op0 && U != Op1; }); + return any_of(I->users(), [&](User *U) { + return U != Op0 && U != Op1 && + !(isa<ShuffleVectorInst>(U) && + (InputShuffles.contains(cast<Instruction>(U)) || + isInstructionTriviallyDead(cast<Instruction>(U)))); + }); }; if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) || checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B)) @@ -1271,6 +1281,9 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { auto *SV = dyn_cast<ShuffleVectorInst>(U); if (!SV || SV->getType() != VT) return false; + if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) || + (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1)) + return false; if (!llvm::is_contained(Shuffles, SV)) Shuffles.push_back(SV); } @@ -1283,13 +1296,25 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { if (FromReduction && Shuffles.size() > 1) return false; + // Add any shuffle uses for the shuffles we have found, to include them in our + // cost calculations. + if (!FromReduction) { + for (ShuffleVectorInst *SV : Shuffles) { + for (auto U : SV->users()) { + ShuffleVectorInst *SSV = dyn_cast<ShuffleVectorInst>(U); + if (SSV && isa<UndefValue>(SSV->getOperand(1))) + Shuffles.push_back(SSV); + } + } + } + // For each of the output shuffles, we try to sort all the first vector // elements to the beginning, followed by the second array elements at the // end. If the binops are legalized to smaller vectors, this may reduce total // number of binops. We compute the ReconstructMask mask needed to convert // back to the original lane order. - SmallVector<int> V1, V2; - SmallVector<SmallVector<int>> ReconstructMasks; + SmallVector<std::pair<int, int>> V1, V2; + SmallVector<SmallVector<int>> OrigReconstructMasks; int MaxV1Elt = 0, MaxV2Elt = 0; unsigned NumElts = VT->getNumElements(); for (ShuffleVectorInst *SVN : Shuffles) { @@ -1300,6 +1325,16 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { // case we need to commute the mask). Value *SVOp0 = SVN->getOperand(0); Value *SVOp1 = SVN->getOperand(1); + if (isa<UndefValue>(SVOp1)) { + auto *SSV = cast<ShuffleVectorInst>(SVOp0); + SVOp0 = SSV->getOperand(0); + SVOp1 = SSV->getOperand(1); + for (unsigned I = 0, E = Mask.size(); I != E; I++) { + if (Mask[I] >= static_cast<int>(SSV->getShuffleMask().size())) + return false; + Mask[I] = Mask[I] < 0 ? Mask[I] : SSV->getMaskValue(Mask[I]); + } + } if (SVOp0 == Op1 && SVOp1 == Op0) { std::swap(SVOp0, SVOp1); ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); @@ -1316,21 +1351,25 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { ReconstructMask.push_back(-1); } else if (Mask[I] < static_cast<int>(NumElts)) { MaxV1Elt = std::max(MaxV1Elt, Mask[I]); - auto It = find(V1, Mask[I]); + auto It = find_if(V1, [&](const std::pair<int, int> &A) { + return Mask[I] == A.first; + }); if (It != V1.end()) ReconstructMask.push_back(It - V1.begin()); else { ReconstructMask.push_back(V1.size()); - V1.push_back(Mask[I]); + V1.emplace_back(Mask[I], V1.size()); } } else { MaxV2Elt = std::max<int>(MaxV2Elt, Mask[I] - NumElts); - auto It = find(V2, Mask[I] - NumElts); + auto It = find_if(V2, [&](const std::pair<int, int> &A) { + return Mask[I] - static_cast<int>(NumElts) == A.first; + }); if (It != V2.end()) ReconstructMask.push_back(NumElts + It - V2.begin()); else { ReconstructMask.push_back(NumElts + V2.size()); - V2.push_back(Mask[I] - NumElts); + V2.emplace_back(Mask[I] - NumElts, NumElts + V2.size()); } } } @@ -1339,7 +1378,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { // result. In-order can help simplify the shuffle away. if (FromReduction) sort(ReconstructMask); - ReconstructMasks.push_back(ReconstructMask); + OrigReconstructMasks.push_back(std::move(ReconstructMask)); } // If the Maximum element used from V1 and V2 are not larger than the new @@ -1351,16 +1390,68 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { MaxV2Elt == static_cast<int>(V2.size()) - 1)) return false; + // GetBaseMaskValue takes one of the inputs, which may either be a shuffle, a + // shuffle of another shuffle, or not a shuffle (that is treated like a + // identity shuffle). + auto GetBaseMaskValue = [&](Instruction *I, int M) { + auto *SV = dyn_cast<ShuffleVectorInst>(I); + if (!SV) + return M; + if (isa<UndefValue>(SV->getOperand(1))) + if (auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0))) + if (InputShuffles.contains(SSV)) + return SSV->getMaskValue(SV->getMaskValue(M)); + return SV->getMaskValue(M); + }; + + // Attempt to sort the inputs my ascending mask values to make simpler input + // shuffles and push complex shuffles down to the uses. We sort on the first + // of the two input shuffle orders, to try and get at least one input into a + // nice order. + auto SortBase = [&](Instruction *A, std::pair<int, int> X, + std::pair<int, int> Y) { + int MXA = GetBaseMaskValue(A, X.first); + int MYA = GetBaseMaskValue(A, Y.first); + return MXA < MYA; + }; + stable_sort(V1, [&](std::pair<int, int> A, std::pair<int, int> B) { + return SortBase(SVI0A, A, B); + }); + stable_sort(V2, [&](std::pair<int, int> A, std::pair<int, int> B) { + return SortBase(SVI1A, A, B); + }); + // Calculate our ReconstructMasks from the OrigReconstructMasks and the + // modified order of the input shuffles. + SmallVector<SmallVector<int>> ReconstructMasks; + for (auto Mask : OrigReconstructMasks) { + SmallVector<int> ReconstructMask; + for (int M : Mask) { + auto FindIndex = [](const SmallVector<std::pair<int, int>> &V, int M) { + auto It = find_if(V, [M](auto A) { return A.second == M; }); + assert(It != V.end() && "Expected all entries in Mask"); + return std::distance(V.begin(), It); + }; + if (M < 0) + ReconstructMask.push_back(-1); + else if (M < static_cast<int>(NumElts)) { + ReconstructMask.push_back(FindIndex(V1, M)); + } else { + ReconstructMask.push_back(NumElts + FindIndex(V2, M)); + } + } + ReconstructMasks.push_back(std::move(ReconstructMask)); + } + // Calculate the masks needed for the new input shuffles, which get padded // with undef SmallVector<int> V1A, V1B, V2A, V2B; for (unsigned I = 0; I < V1.size(); I++) { - V1A.push_back(SVI0A->getMaskValue(V1[I])); - V1B.push_back(SVI0B->getMaskValue(V1[I])); + V1A.push_back(GetBaseMaskValue(SVI0A, V1[I].first)); + V1B.push_back(GetBaseMaskValue(SVI0B, V1[I].first)); } for (unsigned I = 0; I < V2.size(); I++) { - V2A.push_back(SVI1A->getMaskValue(V2[I])); - V2B.push_back(SVI1B->getMaskValue(V2[I])); + V2A.push_back(GetBaseMaskValue(SVI1A, V2[I].first)); + V2B.push_back(GetBaseMaskValue(SVI1B, V2[I].first)); } while (V1A.size() < NumElts) { V1A.push_back(UndefMaskElem); @@ -1371,9 +1462,14 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { V2B.push_back(UndefMaskElem); } - auto AddShuffleCost = [&](InstructionCost C, ShuffleVectorInst *SV) { - return C + - TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, SV->getShuffleMask()); + auto AddShuffleCost = [&](InstructionCost C, Instruction *I) { + auto *SV = dyn_cast<ShuffleVectorInst>(I); + if (!SV) + return C; + return C + TTI.getShuffleCost(isa<UndefValue>(SV->getOperand(1)) + ? TTI::SK_PermuteSingleSrc + : TTI::SK_PermuteTwoSrc, + VT, SV->getShuffleMask()); }; auto AddShuffleMaskCost = [&](InstructionCost C, ArrayRef<int> Mask) { return C + TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, Mask); @@ -1386,9 +1482,6 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { TTI.getArithmeticInstrCost(Op1->getOpcode(), VT); CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(), InstructionCost(0), AddShuffleCost); - // This set helps us only cost each unique shuffle once. - SmallPtrSet<ShuffleVectorInst *, 4> InputShuffles( - {SVI0A, SVI0B, SVI1A, SVI1B}); CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(), InstructionCost(0), AddShuffleCost); @@ -1408,22 +1501,35 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(), InstructionCost(0), AddShuffleMaskCost); + LLVM_DEBUG(dbgs() << "Found a binop select shuffle pattern: " << I << "\n"); + LLVM_DEBUG(dbgs() << " CostBefore: " << CostBefore + << " vs CostAfter: " << CostAfter << "\n"); if (CostBefore <= CostAfter) return false; // The cost model has passed, create the new instructions. - Builder.SetInsertPoint(SVI0A); - Value *NSV0A = Builder.CreateShuffleVector(SVI0A->getOperand(0), - SVI0A->getOperand(1), V1A); - Builder.SetInsertPoint(SVI0B); - Value *NSV0B = Builder.CreateShuffleVector(SVI0B->getOperand(0), - SVI0B->getOperand(1), V1B); - Builder.SetInsertPoint(SVI1A); - Value *NSV1A = Builder.CreateShuffleVector(SVI1A->getOperand(0), - SVI1A->getOperand(1), V2A); - Builder.SetInsertPoint(SVI1B); - Value *NSV1B = Builder.CreateShuffleVector(SVI1B->getOperand(0), - SVI1B->getOperand(1), V2B); + auto GetShuffleOperand = [&](Instruction *I, unsigned Op) -> Value * { + auto *SV = dyn_cast<ShuffleVectorInst>(I); + if (!SV) + return I; + if (isa<UndefValue>(SV->getOperand(1))) + if (auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0))) + if (InputShuffles.contains(SSV)) + return SSV->getOperand(Op); + return SV->getOperand(Op); + }; + Builder.SetInsertPoint(SVI0A->getNextNode()); + Value *NSV0A = Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0), + GetShuffleOperand(SVI0A, 1), V1A); + Builder.SetInsertPoint(SVI0B->getNextNode()); + Value *NSV0B = Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0), + GetShuffleOperand(SVI0B, 1), V1B); + Builder.SetInsertPoint(SVI1A->getNextNode()); + Value *NSV1A = Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0), + GetShuffleOperand(SVI1A, 1), V2A); + Builder.SetInsertPoint(SVI1B->getNextNode()); + Value *NSV1B = Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0), + GetShuffleOperand(SVI1B, 1), V2B); Builder.SetInsertPoint(Op0); Value *NOp0 = Builder.CreateBinOp((Instruction::BinaryOps)Op0->getOpcode(), NSV0A, NSV0B); diff --git a/llvm/tools/bugpoint/bugpoint.cpp b/llvm/tools/bugpoint/bugpoint.cpp index 6e3f237d0a39..4dece12f75ef 100644 --- a/llvm/tools/bugpoint/bugpoint.cpp +++ b/llvm/tools/bugpoint/bugpoint.cpp @@ -24,7 +24,6 @@ #include "llvm/Passes/PassPlugin.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InitLLVM.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PluginLoader.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Process.h" diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 8d82d78b15b5..853a0bd8eb54 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -47,7 +47,6 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Host.h" #include "llvm/Support/InitLLVM.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PluginLoader.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" @@ -192,7 +191,11 @@ static cl::opt<std::string> RemarksFormat( cl::value_desc("format"), cl::init("yaml")); namespace { -static ManagedStatic<std::vector<std::string>> RunPassNames; + +std::vector<std::string> &getRunPassNames() { + static std::vector<std::string> RunPassNames; + return RunPassNames; +} struct RunPassOption { void operator=(const std::string &Val) const { @@ -201,7 +204,7 @@ struct RunPassOption { SmallVector<StringRef, 8> PassNames; StringRef(Val).split(PassNames, ',', -1, false); for (auto PassName : PassNames) - RunPassNames->push_back(std::string(PassName)); + getRunPassNames().push_back(std::string(PassName)); } }; } @@ -576,7 +579,7 @@ static int compileModule(char **argv, LLVMContext &Context) { Optional<CodeModel::Model> CM_IR = M->getCodeModel(); if (!CM && CM_IR) - Target->setCodeModel(CM_IR.getValue()); + Target->setCodeModel(CM_IR.value()); } else { TheTriple = Triple(Triple::normalize(TargetTriple)); if (TheTriple.getTriple().empty()) @@ -676,7 +679,7 @@ static int compileModule(char **argv, LLVMContext &Context) { // Construct a custom pass pipeline that starts after instruction // selection. - if (!RunPassNames->empty()) { + if (!getRunPassNames().empty()) { if (!MIR) { WithColor::warning(errs(), argv[0]) << "run-pass is for .mir file only.\n"; @@ -694,7 +697,7 @@ static int compileModule(char **argv, LLVMContext &Context) { PM.add(&TPC); PM.add(MMIWP); TPC.printAndVerify(""); - for (const std::string &RunPassName : *RunPassNames) { + for (const std::string &RunPassName : getRunPassNames()) { if (addPass(PM, argv0, RunPassName, TPC)) return 1; } diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index f2e3886bdf07..42bea1a6487f 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -57,7 +57,6 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Format.h" #include "llvm/Support/InitLLVM.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Memory.h" #include "llvm/Support/MemoryBuffer.h" @@ -536,9 +535,9 @@ int main(int argc, char **argv, char * const *envp) { builder.setMCPU(codegen::getCPUStr()); builder.setMAttrs(codegen::getFeatureList()); if (auto RM = codegen::getExplicitRelocModel()) - builder.setRelocationModel(RM.getValue()); + builder.setRelocationModel(RM.value()); if (auto CM = codegen::getExplicitCodeModel()) - builder.setCodeModel(CM.getValue()); + builder.setCodeModel(CM.value()); builder.setErrorStr(&ErrorMsg); builder.setEngineKind(ForceInterpreter ? EngineKind::Interpreter diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp index 11dad0d9c369..b777f7271dc6 100644 --- a/llvm/tools/llvm-as/llvm-as.cpp +++ b/llvm/tools/llvm-as/llvm-as.cpp @@ -23,7 +23,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/SystemUtils.h" #include "llvm/Support/ToolOutputFile.h" diff --git a/llvm/tools/llvm-cov/CoverageExporterJson.cpp b/llvm/tools/llvm-cov/CoverageExporterJson.cpp index d341abe8dfc8..2e161f53b10d 100644 --- a/llvm/tools/llvm-cov/CoverageExporterJson.cpp +++ b/llvm/tools/llvm-cov/CoverageExporterJson.cpp @@ -291,8 +291,8 @@ void CoverageExporterJson::renderRoot(ArrayRef<std::string> SourceFiles) { const json::Object *ObjB = B.getAsObject(); assert(ObjA != nullptr && "Value A was not an Object"); assert(ObjB != nullptr && "Value B was not an Object"); - const StringRef FilenameA = ObjA->getString("filename").getValue(); - const StringRef FilenameB = ObjB->getString("filename").getValue(); + const StringRef FilenameA = ObjA->getString("filename").value(); + const StringRef FilenameB = ObjB->getString("filename").value(); return FilenameA.compare(FilenameB) < 0; }); auto Export = json::Object( diff --git a/llvm/tools/llvm-cov/llvm-cov.cpp b/llvm/tools/llvm-cov/llvm-cov.cpp index 0e320c0965f9..45de2afb0855 100644 --- a/llvm/tools/llvm-cov/llvm-cov.cpp +++ b/llvm/tools/llvm-cov/llvm-cov.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InitLLVM.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index f79db36d2d2d..87fe90a5225f 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -67,6 +67,19 @@ static cl::opt<std::string> AAPipeline("aa-pipeline", static cl::opt<bool> SaveTemps("save-temps", cl::desc("Save temporary files")); +static cl::list<std::string> SelectSaveTemps( + "select-save-temps", + cl::value_desc("One, or multiple of: " + "resolution,preopt,promote,internalize,import,opt,precodegen" + ",combinedindex"), + cl::desc("Save selected temporary files. Cannot be specified together with " + "-save-temps"), + cl::CommaSeparated); + +constexpr const char *SaveTempsValues[] = { + "resolution", "preopt", "promote", "internalize", + "import", "opt", "precodegen", "combinedindex"}; + static cl::opt<bool> ThinLTODistributedIndexes("thinlto-distributed-indexes", cl::desc("Write out individual index and " @@ -258,9 +271,22 @@ static int run(int argc, char **argv) { Conf.DebugPassManager = DebugPassManager; - if (SaveTemps) - check(Conf.addSaveTemps(OutputFilename + "."), + if (SaveTemps && !SelectSaveTemps.empty()) { + llvm::errs() << "-save-temps cannot be specified with -select-save-temps\n"; + return 1; + } + if (SaveTemps || !SelectSaveTemps.empty()) { + DenseSet<StringRef> SaveTempsArgs; + for (auto &S : SelectSaveTemps) + if (is_contained(SaveTempsValues, S)) + SaveTempsArgs.insert(S); + else { + llvm::errs() << ("invalid -select-save-temps argument: " + S) << '\n'; + return 1; + } + check(Conf.addSaveTemps(OutputFilename + ".", false, SaveTempsArgs), "Config::addSaveTemps failed"); + } // Optimization remarks. Conf.RemarksFilename = RemarksFilename; diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp index 2a525f53ec29..3e737b9fbaa0 100644 --- a/llvm/tools/llvm-mc/llvm-mc.cpp +++ b/llvm/tools/llvm-mc/llvm-mc.cpp @@ -403,7 +403,7 @@ int main(int argc, char **argv) { MAI->setRelaxELFRelocations(RelaxELFRel); if (CompressDebugSections != DebugCompressionType::None) { - if (!zlib::isAvailable()) { + if (!compression::zlib::isAvailable()) { WithColor::error(errs(), ProgName) << "build tools with zlib to enable -compress-debug-sections"; return 1; diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp index 67b636737b97..d3f9738b6323 100644 --- a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp +++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -71,7 +71,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const { TempStream << ' '; if (IIVDEntry.RThroughput) { - double RT = IIVDEntry.RThroughput.getValue(); + double RT = IIVDEntry.RThroughput.value(); TempStream << format("%.2f", RT) << ' '; if (RT < 10.0) TempStream << " "; diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index 409de283e5a1..6f7b74fd11ec 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -92,9 +92,10 @@ static cl::opt<std::string> cl::desc("Target a specific cpu type (-mcpu=help for details)"), cl::value_desc("cpu-name"), cl::cat(ToolOptions), cl::init("native")); -static cl::opt<std::string> MATTR("mattr", - cl::desc("Additional target features."), - cl::cat(ToolOptions)); +static cl::list<std::string> + MATTRS("mattr", cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,..."), cl::cat(ToolOptions)); static cl::opt<bool> PrintJson("json", cl::desc("Print the output in json format"), @@ -346,8 +347,17 @@ int main(int argc, char **argv) { if (MCPU == "native") MCPU = std::string(llvm::sys::getHostCPUName()); + // Package up features to be passed to target/subtarget + std::string FeaturesStr; + if (MATTRS.size()) { + SubtargetFeatures Features; + for (std::string &MAttr : MATTRS) + Features.AddFeature(MAttr); + FeaturesStr = Features.getString(); + } + std::unique_ptr<MCSubtargetInfo> STI( - TheTarget->createMCSubtargetInfo(TripleName, MCPU, MATTR)); + TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr)); assert(STI && "Unable to create subtarget info!"); if (!STI->isCPUStringValid(MCPU)) return 1; diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp index 5b2b4b5704d8..8a2b4855501b 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -236,23 +236,21 @@ static Expected<SectionRename> parseRenameSectionValue(StringRef FlagValue) { } static Expected<std::pair<StringRef, uint64_t>> -parseSetSectionAlignment(StringRef FlagValue) { +parseSetSectionAttribute(StringRef Option, StringRef FlagValue) { if (!FlagValue.contains('=')) - return createStringError( - errc::invalid_argument, - "bad format for --set-section-alignment: missing '='"); + return make_error<StringError>("bad format for " + Option + ": missing '='", + errc::invalid_argument); auto Split = StringRef(FlagValue).split('='); if (Split.first.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --set-section-alignment: missing section name"); - uint64_t NewAlign; - if (Split.second.getAsInteger(0, NewAlign)) - return createStringError( - errc::invalid_argument, - "invalid alignment for --set-section-alignment: '%s'", - Split.second.str().c_str()); - return std::make_pair(Split.first, NewAlign); + return make_error<StringError>("bad format for " + Option + + ": missing section name", + errc::invalid_argument); + uint64_t Value; + if (Split.second.getAsInteger(0, Value)) + return make_error<StringError>("invalid value for " + Option + ": '" + + Split.second + "'", + errc::invalid_argument); + return std::make_pair(Split.first, Value); } static Expected<SectionFlagsUpdate> @@ -739,7 +737,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr, .str() .c_str()); } - if (!zlib::isAvailable()) + if (!compression::zlib::isAvailable()) return createStringError( errc::invalid_argument, "LLVM was not compiled with LLVM_ENABLE_ZLIB: can not compress"); @@ -793,7 +791,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr, } for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_alignment)) { Expected<std::pair<StringRef, uint64_t>> NameAndAlign = - parseSetSectionAlignment(Arg->getValue()); + parseSetSectionAttribute("--set-section-alignment", Arg->getValue()); if (!NameAndAlign) return NameAndAlign.takeError(); Config.SetSectionAlignment[NameAndAlign->first] = NameAndAlign->second; @@ -809,22 +807,28 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr, "--set-section-flags set multiple times for section '%s'", SFU->Name.str().c_str()); } - // Prohibit combinations of --set-section-flags when the section name is used - // by --rename-section, either as a source or a destination. + for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_type)) { + Expected<std::pair<StringRef, uint64_t>> NameAndType = + parseSetSectionAttribute("--set-section-type", Arg->getValue()); + if (!NameAndType) + return NameAndType.takeError(); + Config.SetSectionType[NameAndType->first] = NameAndType->second; + } + // Prohibit combinations of --set-section-{flags,type} when the section name + // is used as the destination of a --rename-section. for (const auto &E : Config.SectionsToRename) { const SectionRename &SR = E.second; - if (Config.SetSectionFlags.count(SR.OriginalName)) - return createStringError( - errc::invalid_argument, - "--set-section-flags=%s conflicts with --rename-section=%s=%s", - SR.OriginalName.str().c_str(), SR.OriginalName.str().c_str(), - SR.NewName.str().c_str()); - if (Config.SetSectionFlags.count(SR.NewName)) + auto Err = [&](const char *Option) { return createStringError( errc::invalid_argument, - "--set-section-flags=%s conflicts with --rename-section=%s=%s", + "--set-section-%s=%s conflicts with --rename-section=%s=%s", Option, SR.NewName.str().c_str(), SR.OriginalName.str().c_str(), SR.NewName.str().c_str()); + }; + if (Config.SetSectionFlags.count(SR.NewName)) + return Err("flags"); + if (Config.SetSectionType.count(SR.NewName)) + return Err("type"); } for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section)) @@ -998,7 +1002,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr, "--decompress-debug-sections"); } - if (Config.DecompressDebugSections && !zlib::isAvailable()) + if (Config.DecompressDebugSections && !compression::zlib::isAvailable()) return createStringError( errc::invalid_argument, "LLVM was not compiled with LLVM_ENABLE_ZLIB: cannot decompress"); diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td index ff73265989f3..962028da47a0 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td +++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td @@ -87,6 +87,11 @@ defm set_section_flags "data, rom, share, contents, merge, strings.">, MetaVarName<"section=flag1[,flag2,...]">; +defm set_section_type + : Eq<"set-section-type", + "Set the type of section <section> to the integer <type>">, + MetaVarName<"section=type">; + def S : Flag<["-"], "S">, Alias<strip_all>, HelpText<"Alias for --strip-all">; diff --git a/llvm/tools/llvm-objdump/COFFDump.cpp b/llvm/tools/llvm-objdump/COFFDump.cpp index e085e26c3cd0..e65762e02022 100644 --- a/llvm/tools/llvm-objdump/COFFDump.cpp +++ b/llvm/tools/llvm-objdump/COFFDump.cpp @@ -800,11 +800,11 @@ void objdump::printCOFFFileHeader(const COFFObjectFile &Obj) { printExportTable(&Obj); } -void objdump::printCOFFSymbolTable(const object::COFFImportFile *i) { +void objdump::printCOFFSymbolTable(const object::COFFImportFile &i) { unsigned Index = 0; - bool IsCode = i->getCOFFImportHeader()->getType() == COFF::IMPORT_CODE; + bool IsCode = i.getCOFFImportHeader()->getType() == COFF::IMPORT_CODE; - for (const object::BasicSymbolRef &Sym : i->symbols()) { + for (const object::BasicSymbolRef &Sym : i.symbols()) { std::string Name; raw_string_ostream NS(Name); @@ -823,15 +823,15 @@ void objdump::printCOFFSymbolTable(const object::COFFImportFile *i) { } } -void objdump::printCOFFSymbolTable(const COFFObjectFile *coff) { - for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) { - Expected<COFFSymbolRef> Symbol = coff->getSymbol(SI); +void objdump::printCOFFSymbolTable(const COFFObjectFile &coff) { + for (unsigned SI = 0, SE = coff.getNumberOfSymbols(); SI != SE; ++SI) { + Expected<COFFSymbolRef> Symbol = coff.getSymbol(SI); if (!Symbol) - reportError(Symbol.takeError(), coff->getFileName()); + reportError(Symbol.takeError(), coff.getFileName()); - Expected<StringRef> NameOrErr = coff->getSymbolName(*Symbol); + Expected<StringRef> NameOrErr = coff.getSymbolName(*Symbol); if (!NameOrErr) - reportError(NameOrErr.takeError(), coff->getFileName()); + reportError(NameOrErr.takeError(), coff.getFileName()); StringRef Name = *NameOrErr; outs() << "[" << format("%2d", SI) << "]" @@ -861,8 +861,8 @@ void objdump::printCOFFSymbolTable(const COFFObjectFile *coff) { if (Symbol->isSectionDefinition()) { const coff_aux_section_definition *asd; if (Error E = - coff->getAuxSymbol<coff_aux_section_definition>(SI + 1, asd)) - reportError(std::move(E), coff->getFileName()); + coff.getAuxSymbol<coff_aux_section_definition>(SI + 1, asd)) + reportError(std::move(E), coff.getFileName()); int32_t AuxNumber = asd->getNumber(Symbol->isBigObj()); @@ -877,19 +877,19 @@ void objdump::printCOFFSymbolTable(const COFFObjectFile *coff) { , unsigned(asd->Selection)); } else if (Symbol->isFileRecord()) { const char *FileName; - if (Error E = coff->getAuxSymbol<char>(SI + 1, FileName)) - reportError(std::move(E), coff->getFileName()); + if (Error E = coff.getAuxSymbol<char>(SI + 1, FileName)) + reportError(std::move(E), coff.getFileName()); StringRef Name(FileName, Symbol->getNumberOfAuxSymbols() * - coff->getSymbolTableEntrySize()); + coff.getSymbolTableEntrySize()); outs() << "AUX " << Name.rtrim(StringRef("\0", 1)) << '\n'; SI = SI + Symbol->getNumberOfAuxSymbols(); break; } else if (Symbol->isWeakExternal()) { const coff_aux_weak_external *awe; - if (Error E = coff->getAuxSymbol<coff_aux_weak_external>(SI + 1, awe)) - reportError(std::move(E), coff->getFileName()); + if (Error E = coff.getAuxSymbol<coff_aux_weak_external>(SI + 1, awe)) + reportError(std::move(E), coff.getFileName()); outs() << "AUX " << format("indx %d srch %d\n", static_cast<uint32_t>(awe->TagIndex), diff --git a/llvm/tools/llvm-objdump/COFFDump.h b/llvm/tools/llvm-objdump/COFFDump.h index ffd39671debe..33a841df5874 100644 --- a/llvm/tools/llvm-objdump/COFFDump.h +++ b/llvm/tools/llvm-objdump/COFFDump.h @@ -28,8 +28,8 @@ Error getCOFFRelocationValueString(const object::COFFObjectFile *Obj, void printCOFFUnwindInfo(const object::COFFObjectFile *O); void printCOFFFileHeader(const object::COFFObjectFile &Obj); -void printCOFFSymbolTable(const object::COFFImportFile *I); -void printCOFFSymbolTable(const object::COFFObjectFile *O); +void printCOFFSymbolTable(const object::COFFImportFile &I); +void printCOFFSymbolTable(const object::COFFObjectFile &O); } // namespace objdump } // namespace llvm diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp index 60c34158941b..cdbecd5ec243 100644 --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -1979,7 +1979,7 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF, if (Relocations) PrintRelocations(MachOOF, Verbose); if (SectionHeaders) - printSectionHeaders(MachOOF); + printSectionHeaders(*MachOOF); if (SectionContents) printSectionContents(MachOOF); if (!FilterSections.empty()) @@ -1993,7 +1993,7 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF, if (DylibId) PrintDylibs(MachOOF, true); if (SymbolTable) - printSymbolTable(MachOOF, ArchiveName, ArchitectureName); + printSymbolTable(*MachOOF, ArchiveName, ArchitectureName); if (UnwindInfo) printMachOUnwindInfo(MachOOF); if (PrivateHeaders) { diff --git a/llvm/tools/llvm-objdump/OffloadDump.cpp b/llvm/tools/llvm-objdump/OffloadDump.cpp index 7d4461f0a70e..46334c249070 100644 --- a/llvm/tools/llvm-objdump/OffloadDump.cpp +++ b/llvm/tools/llvm-objdump/OffloadDump.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "OffloadDump.h" #include "llvm-objdump.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Alignment.h" using namespace llvm; using namespace llvm::object; using namespace llvm::objdump; -constexpr const char OffloadSectionString[] = ".llvm.offloading"; - /// Get the printable name of the image kind. static StringRef getImageName(const OffloadBinary &OB) { switch (OB.getImageKind()) { @@ -66,17 +66,27 @@ static Error visitAllBinaries(const OffloadBinary &OB) { /// Print the embedded offloading contents of an ObjectFile \p O. void llvm::dumpOffloadBinary(const ObjectFile &O) { - for (SectionRef Sec : O.sections()) { - Expected<StringRef> Name = Sec.getName(); - if (!Name || !Name->startswith(OffloadSectionString)) + if (!O.isELF()) { + reportWarning("--offloading is currently only supported for ELF targets", + O.getFileName()); + return; + } + + for (ELFSectionRef Sec : O.sections()) { + if (Sec.getType() != ELF::SHT_LLVM_OFFLOADING) continue; Expected<StringRef> Contents = Sec.getContents(); if (!Contents) reportError(Contents.takeError(), O.getFileName()); - MemoryBufferRef Buffer = MemoryBufferRef(*Contents, O.getFileName()); - auto BinaryOrErr = OffloadBinary::create(Buffer); + std::unique_ptr<MemoryBuffer> Buffer = + MemoryBuffer::getMemBuffer(*Contents, O.getFileName(), false); + if (!isAddrAligned(Align(OffloadBinary::getAlignment()), + Buffer->getBufferStart())) + Buffer = MemoryBuffer::getMemBufferCopy(Buffer->getBuffer(), + Buffer->getBufferIdentifier()); + auto BinaryOrErr = OffloadBinary::create(*Buffer); if (!BinaryOrErr) reportError(O.getFileName(), "while extracting offloading files: " + toString(BinaryOrErr.takeError())); diff --git a/llvm/tools/llvm-objdump/XCOFFDump.cpp b/llvm/tools/llvm-objdump/XCOFFDump.cpp index 159741bebb67..dd1570e1736c 100644 --- a/llvm/tools/llvm-objdump/XCOFFDump.cpp +++ b/llvm/tools/llvm-objdump/XCOFFDump.cpp @@ -19,11 +19,11 @@ using namespace llvm; using namespace llvm::object; -Error objdump::getXCOFFRelocationValueString(const XCOFFObjectFile *Obj, +Error objdump::getXCOFFRelocationValueString(const XCOFFObjectFile &Obj, const RelocationRef &Rel, SmallVectorImpl<char> &Result) { symbol_iterator SymI = Rel.getSymbol(); - if (SymI == Obj->symbol_end()) + if (SymI == Obj.symbol_end()) return make_error<GenericBinaryError>( "invalid symbol reference in relocation entry", object_error::parse_failed); @@ -44,9 +44,9 @@ Error objdump::getXCOFFRelocationValueString(const XCOFFObjectFile *Obj, } Optional<XCOFF::StorageMappingClass> -objdump::getXCOFFSymbolCsectSMC(const XCOFFObjectFile *Obj, +objdump::getXCOFFSymbolCsectSMC(const XCOFFObjectFile &Obj, const SymbolRef &Sym) { - const XCOFFSymbolRef SymRef = Obj->toSymbolRef(Sym.getRawDataRefImpl()); + const XCOFFSymbolRef SymRef = Obj.toSymbolRef(Sym.getRawDataRefImpl()); if (!SymRef.isCsectSymbol()) return None; @@ -59,10 +59,9 @@ objdump::getXCOFFSymbolCsectSMC(const XCOFFObjectFile *Obj, } Optional<object::SymbolRef> -objdump::getXCOFFSymbolContainingSymbolRef(const XCOFFObjectFile *Obj, +objdump::getXCOFFSymbolContainingSymbolRef(const XCOFFObjectFile &Obj, const SymbolRef &Sym) { - - const XCOFFSymbolRef SymRef = Obj->toSymbolRef(Sym.getRawDataRefImpl()); + const XCOFFSymbolRef SymRef = Obj.toSymbolRef(Sym.getRawDataRefImpl()); if (!SymRef.isCsectSymbol()) return None; @@ -72,14 +71,12 @@ objdump::getXCOFFSymbolContainingSymbolRef(const XCOFFObjectFile *Obj, uint32_t Idx = static_cast<uint32_t>(CsectAuxEntOrErr.get().getSectionOrLength()); DataRefImpl DRI; - DRI.p = Obj->getSymbolByIndex(Idx); - return SymbolRef(DRI, Obj); + DRI.p = Obj.getSymbolByIndex(Idx); + return SymbolRef(DRI, &Obj); } -bool objdump::isLabel(const XCOFFObjectFile *Obj, const SymbolRef &Sym) { - - const XCOFFSymbolRef SymRef = Obj->toSymbolRef(Sym.getRawDataRefImpl()); - +bool objdump::isLabel(const XCOFFObjectFile &Obj, const SymbolRef &Sym) { + const XCOFFSymbolRef SymRef = Obj.toSymbolRef(Sym.getRawDataRefImpl()); if (!SymRef.isCsectSymbol()) return false; @@ -97,8 +94,8 @@ std::string objdump::getXCOFFSymbolDescription(const SymbolInfoTy &SymbolInfo, std::string Result; // Dummy symbols have no symbol index. if (SymbolInfo.XCOFFSymInfo.Index) - Result = ("(idx: " + Twine(SymbolInfo.XCOFFSymInfo.Index.getValue()) + - ") " + SymbolName) + Result = ("(idx: " + Twine(SymbolInfo.XCOFFSymInfo.Index.value()) + ") " + + SymbolName) .str(); else Result.append(SymbolName.begin(), SymbolName.end()); diff --git a/llvm/tools/llvm-objdump/XCOFFDump.h b/llvm/tools/llvm-objdump/XCOFFDump.h index 6796f00aef6f..461605940946 100644 --- a/llvm/tools/llvm-objdump/XCOFFDump.h +++ b/llvm/tools/llvm-objdump/XCOFFDump.h @@ -17,19 +17,19 @@ struct SymbolInfoTy; namespace objdump { Optional<XCOFF::StorageMappingClass> -getXCOFFSymbolCsectSMC(const object::XCOFFObjectFile *Obj, +getXCOFFSymbolCsectSMC(const object::XCOFFObjectFile &Obj, const object::SymbolRef &Sym); Optional<object::SymbolRef> -getXCOFFSymbolContainingSymbolRef(const object::XCOFFObjectFile *Obj, +getXCOFFSymbolContainingSymbolRef(const object::XCOFFObjectFile &Obj, const object::SymbolRef &Sym); -bool isLabel(const object::XCOFFObjectFile *Obj, const object::SymbolRef &Sym); +bool isLabel(const object::XCOFFObjectFile &Obj, const object::SymbolRef &Sym); std::string getXCOFFSymbolDescription(const SymbolInfoTy &SymbolInfo, StringRef SymbolName); -Error getXCOFFRelocationValueString(const object::XCOFFObjectFile *Obj, +Error getXCOFFRelocationValueString(const object::XCOFFObjectFile &Obj, const object::RelocationRef &RelRef, llvm::SmallVectorImpl<char> &Result); } // namespace objdump diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 7cd47da9efd9..1245f9e18206 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -391,7 +391,7 @@ static Error getRelocationValueString(const RelocationRef &Rel, if (auto *MachO = dyn_cast<MachOObjectFile>(Obj)) return getMachORelocationValueString(MachO, Rel, Result); if (auto *XCOFF = dyn_cast<XCOFFObjectFile>(Obj)) - return getXCOFFRelocationValueString(XCOFF, Rel, Result); + return getXCOFFRelocationValueString(*XCOFF, Rel, Result); llvm_unreachable("unknown object file format"); } @@ -435,22 +435,22 @@ unsigned getInstStartColumn(const MCSubtargetInfo &STI) { return !ShowRawInsn ? 16 : STI.getTargetTriple().isX86() ? 40 : 24; } -static bool isAArch64Elf(const ObjectFile *Obj) { - const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj); +static bool isAArch64Elf(const ObjectFile &Obj) { + const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj); return Elf && Elf->getEMachine() == ELF::EM_AARCH64; } -static bool isArmElf(const ObjectFile *Obj) { - const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj); +static bool isArmElf(const ObjectFile &Obj) { + const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj); return Elf && Elf->getEMachine() == ELF::EM_ARM; } -static bool isCSKYElf(const ObjectFile *Obj) { - const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj); +static bool isCSKYElf(const ObjectFile &Obj) { + const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj); return Elf && Elf->getEMachine() == ELF::EM_CSKY; } -static bool hasMappingSymbols(const ObjectFile *Obj) { +static bool hasMappingSymbols(const ObjectFile &Obj) { return isArmElf(Obj) || isAArch64Elf(Obj) || isCSKYElf(Obj) ; } @@ -679,50 +679,51 @@ PrettyPrinter &selectPrettyPrinter(Triple const &Triple) { } } -static uint8_t getElfSymbolType(const ObjectFile *Obj, const SymbolRef &Sym) { - assert(Obj->isELF()); - if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj)) +static uint8_t getElfSymbolType(const ObjectFile &Obj, const SymbolRef &Sym) { + assert(Obj.isELF()); + if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj)) return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl()), - Obj->getFileName()) + Obj.getFileName()) ->getType(); - if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj)) + if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(&Obj)) return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl()), - Obj->getFileName()) + Obj.getFileName()) ->getType(); - if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj)) + if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(&Obj)) return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl()), - Obj->getFileName()) + Obj.getFileName()) ->getType(); - if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj)) + if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(&Obj)) return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl()), - Obj->getFileName()) + Obj.getFileName()) ->getType(); llvm_unreachable("Unsupported binary format"); } -template <class ELFT> static void -addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj, +template <class ELFT> +static void +addDynamicElfSymbols(const ELFObjectFile<ELFT> &Obj, std::map<SectionRef, SectionSymbolsTy> &AllSymbols) { - for (auto Symbol : Obj->getDynamicSymbolIterators()) { + for (auto Symbol : Obj.getDynamicSymbolIterators()) { uint8_t SymbolType = Symbol.getELFType(); if (SymbolType == ELF::STT_SECTION) continue; - uint64_t Address = unwrapOrError(Symbol.getAddress(), Obj->getFileName()); + uint64_t Address = unwrapOrError(Symbol.getAddress(), Obj.getFileName()); // ELFSymbolRef::getAddress() returns size instead of value for common // symbols which is not desirable for disassembly output. Overriding. if (SymbolType == ELF::STT_COMMON) - Address = unwrapOrError(Obj->getSymbol(Symbol.getRawDataRefImpl()), - Obj->getFileName()) + Address = unwrapOrError(Obj.getSymbol(Symbol.getRawDataRefImpl()), + Obj.getFileName()) ->st_value; - StringRef Name = unwrapOrError(Symbol.getName(), Obj->getFileName()); + StringRef Name = unwrapOrError(Symbol.getName(), Obj.getFileName()); if (Name.empty()) continue; section_iterator SecI = - unwrapOrError(Symbol.getSection(), Obj->getFileName()); - if (SecI == Obj->section_end()) + unwrapOrError(Symbol.getSection(), Obj.getFileName()); + if (SecI == Obj.section_end()) continue; AllSymbols[*SecI].emplace_back(Address, Name, SymbolType); @@ -730,24 +731,23 @@ addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj, } static void -addDynamicElfSymbols(const ObjectFile *Obj, +addDynamicElfSymbols(const ELFObjectFileBase &Obj, std::map<SectionRef, SectionSymbolsTy> &AllSymbols) { - assert(Obj->isELF()); - if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj)) - addDynamicElfSymbols(Elf32LEObj, AllSymbols); - else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj)) - addDynamicElfSymbols(Elf64LEObj, AllSymbols); - else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj)) - addDynamicElfSymbols(Elf32BEObj, AllSymbols); - else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj)) - addDynamicElfSymbols(Elf64BEObj, AllSymbols); + if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj)) + addDynamicElfSymbols(*Elf32LEObj, AllSymbols); + else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(&Obj)) + addDynamicElfSymbols(*Elf64LEObj, AllSymbols); + else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(&Obj)) + addDynamicElfSymbols(*Elf32BEObj, AllSymbols); + else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(&Obj)) + addDynamicElfSymbols(*Elf64BEObj, AllSymbols); else llvm_unreachable("Unsupported binary format"); } -static Optional<SectionRef> getWasmCodeSection(const WasmObjectFile *Obj) { - for (auto SecI : Obj->sections()) { - const WasmSection &Section = Obj->getWasmSection(SecI); +static Optional<SectionRef> getWasmCodeSection(const WasmObjectFile &Obj) { + for (auto SecI : Obj.sections()) { + const WasmSection &Section = Obj.getWasmSection(SecI); if (Section.Type == wasm::WASM_SEC_CODE) return SecI; } @@ -755,7 +755,7 @@ static Optional<SectionRef> getWasmCodeSection(const WasmObjectFile *Obj) { } static void -addMissingWasmCodeSymbols(const WasmObjectFile *Obj, +addMissingWasmCodeSymbols(const WasmObjectFile &Obj, std::map<SectionRef, SectionSymbolsTy> &AllSymbols) { Optional<SectionRef> Section = getWasmCodeSection(Obj); if (!Section) @@ -766,7 +766,7 @@ addMissingWasmCodeSymbols(const WasmObjectFile *Obj, for (const auto &Sym : Symbols) SymbolAddresses.insert(Sym.Addr); - for (const wasm::WasmFunction &Function : Obj->functions()) { + for (const wasm::WasmFunction &Function : Obj.functions()) { uint64_t Address = Function.CodeSectionOffset; // Only add fallback symbols for functions not already present in the symbol // table. @@ -782,11 +782,11 @@ addMissingWasmCodeSymbols(const WasmObjectFile *Obj, } } -static void addPltEntries(const ObjectFile *Obj, +static void addPltEntries(const ObjectFile &Obj, std::map<SectionRef, SectionSymbolsTy> &AllSymbols, StringSaver &Saver) { Optional<SectionRef> Plt = None; - for (const SectionRef &Section : Obj->sections()) { + for (const SectionRef &Section : Obj.sections()) { Expected<StringRef> SecNameOrErr = Section.getName(); if (!SecNameOrErr) { consumeError(SecNameOrErr.takeError()); @@ -797,7 +797,7 @@ static void addPltEntries(const ObjectFile *Obj, } if (!Plt) return; - if (auto *ElfObj = dyn_cast<ELFObjectFileBase>(Obj)) { + if (auto *ElfObj = dyn_cast<ELFObjectFileBase>(&Obj)) { for (auto PltEntry : ElfObj->getPltAddresses()) { if (PltEntry.first) { SymbolRef Symbol(*PltEntry.first, ElfObj); @@ -815,7 +815,7 @@ static void addPltEntries(const ObjectFile *Obj, } reportWarning("PLT entry at 0x" + Twine::utohexstr(PltEntry.second) + " references an invalid symbol", - Obj->getFileName()); + Obj.getFileName()); } } } @@ -892,12 +892,12 @@ static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols, } static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index, - uint64_t End, const ObjectFile *Obj, + uint64_t End, const ObjectFile &Obj, ArrayRef<uint8_t> Bytes, ArrayRef<MappingSymbolPair> MappingSymbols, raw_ostream &OS) { support::endianness Endian = - Obj->isLittleEndian() ? support::little : support::big; + Obj.isLittleEndian() ? support::little : support::big; OS << format("%8" PRIx64 ":\t", SectionAddr + Index); if (Index + 4 <= End) { dumpBytes(Bytes.slice(Index, 4), OS); @@ -952,34 +952,34 @@ static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, } } -SymbolInfoTy objdump::createSymbolInfo(const ObjectFile *Obj, +SymbolInfoTy objdump::createSymbolInfo(const ObjectFile &Obj, const SymbolRef &Symbol) { - const StringRef FileName = Obj->getFileName(); + const StringRef FileName = Obj.getFileName(); const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); const StringRef Name = unwrapOrError(Symbol.getName(), FileName); - if (Obj->isXCOFF() && SymbolDescription) { - const auto *XCOFFObj = cast<XCOFFObjectFile>(Obj); + if (Obj.isXCOFF() && SymbolDescription) { + const auto &XCOFFObj = cast<XCOFFObjectFile>(Obj); DataRefImpl SymbolDRI = Symbol.getRawDataRefImpl(); - const uint32_t SymbolIndex = XCOFFObj->getSymbolIndex(SymbolDRI.p); + const uint32_t SymbolIndex = XCOFFObj.getSymbolIndex(SymbolDRI.p); Optional<XCOFF::StorageMappingClass> Smc = getXCOFFSymbolCsectSMC(XCOFFObj, Symbol); return SymbolInfoTy(Addr, Name, Smc, SymbolIndex, isLabel(XCOFFObj, Symbol)); - } else if (Obj->isXCOFF()) { + } else if (Obj.isXCOFF()) { const SymbolRef::Type SymType = unwrapOrError(Symbol.getType(), FileName); return SymbolInfoTy(Addr, Name, SymType, true); } else return SymbolInfoTy(Addr, Name, - Obj->isELF() ? getElfSymbolType(Obj, Symbol) - : (uint8_t)ELF::STT_NOTYPE); + Obj.isELF() ? getElfSymbolType(Obj, Symbol) + : (uint8_t)ELF::STT_NOTYPE); } -static SymbolInfoTy createDummySymbolInfo(const ObjectFile *Obj, +static SymbolInfoTy createDummySymbolInfo(const ObjectFile &Obj, const uint64_t Addr, StringRef &Name, uint8_t Type) { - if (Obj->isXCOFF() && SymbolDescription) + if (Obj.isXCOFF() && SymbolDescription) return SymbolInfoTy(Addr, Name, None, None, false); else return SymbolInfoTy(Addr, Name, Type); @@ -1131,14 +1131,14 @@ static void emitPostInstructionInfo(formatted_raw_ostream &FOS, FOS.flush(); } -static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, +static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, const MCInstrAnalysis *MIA, MCInstPrinter *IP, const MCSubtargetInfo *PrimarySTI, const MCSubtargetInfo *SecondarySTI, - PrettyPrinter &PIP, - SourcePrinter &SP, bool InlineRelocs) { + PrettyPrinter &PIP, SourcePrinter &SP, + bool InlineRelocs) { const MCSubtargetInfo *STI = PrimarySTI; MCDisassembler *DisAsm = PrimaryDisAsm; bool PrimaryIsThumb = false; @@ -1147,25 +1147,25 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, std::map<SectionRef, std::vector<RelocationRef>> RelocMap; if (InlineRelocs) - RelocMap = getRelocsMap(*Obj); - bool Is64Bits = Obj->getBytesInAddress() > 4; + RelocMap = getRelocsMap(Obj); + bool Is64Bits = Obj.getBytesInAddress() > 4; // Create a mapping from virtual address to symbol name. This is used to // pretty print the symbols while disassembling. std::map<SectionRef, SectionSymbolsTy> AllSymbols; SectionSymbolsTy AbsoluteSymbols; - const StringRef FileName = Obj->getFileName(); - const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj); - for (const SymbolRef &Symbol : Obj->symbols()) { + const StringRef FileName = Obj.getFileName(); + const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(&Obj); + for (const SymbolRef &Symbol : Obj.symbols()) { Expected<StringRef> NameOrErr = Symbol.getName(); if (!NameOrErr) { reportWarning(toString(NameOrErr.takeError()), FileName); continue; } - if (NameOrErr->empty() && !(Obj->isXCOFF() && SymbolDescription)) + if (NameOrErr->empty() && !(Obj.isXCOFF() && SymbolDescription)) continue; - if (Obj->isELF() && getElfSymbolType(Obj, Symbol) == ELF::STT_SECTION) + if (Obj.isELF() && getElfSymbolType(Obj, Symbol) == ELF::STT_SECTION) continue; if (MachO) { @@ -1186,16 +1186,16 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName); - if (SecI != Obj->section_end()) + if (SecI != Obj.section_end()) AllSymbols[*SecI].push_back(createSymbolInfo(Obj, Symbol)); else AbsoluteSymbols.push_back(createSymbolInfo(Obj, Symbol)); } - if (AllSymbols.empty() && Obj->isELF()) - addDynamicElfSymbols(Obj, AllSymbols); + if (AllSymbols.empty() && Obj.isELF()) + addDynamicElfSymbols(cast<ELFObjectFileBase>(Obj), AllSymbols); - if (Obj->isWasm()) + if (Obj.isWasm()) addMissingWasmCodeSymbols(cast<WasmObjectFile>(Obj), AllSymbols); BumpPtrAllocator A; @@ -1207,7 +1207,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // before same-addressed non-empty sections so that symbol lookups prefer the // non-empty section. std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses; - for (SectionRef Sec : Obj->sections()) + for (SectionRef Sec : Obj.sections()) SectionAddresses.emplace_back(Sec.getAddress(), Sec); llvm::stable_sort(SectionAddresses, [](const auto &LHS, const auto &RHS) { if (LHS.first != RHS.first) @@ -1217,17 +1217,17 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // Linked executables (.exe and .dll files) typically don't include a real // symbol table but they might contain an export table. - if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) { + if (const auto *COFFObj = dyn_cast<COFFObjectFile>(&Obj)) { for (const auto &ExportEntry : COFFObj->export_directories()) { StringRef Name; if (Error E = ExportEntry.getSymbolName(Name)) - reportError(std::move(E), Obj->getFileName()); + reportError(std::move(E), Obj.getFileName()); if (Name.empty()) continue; uint32_t RVA; if (Error E = ExportEntry.getExportRVA(RVA)) - reportError(std::move(E), Obj->getFileName()); + reportError(std::move(E), Obj.getFileName()); uint64_t VA = COFFObj->getImageBase() + RVA; auto Sec = partition_point( @@ -1254,14 +1254,14 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, LiveVariablePrinter LVP(*Ctx.getRegisterInfo(), *STI); if (DbgVariables != DVDisabled) { - DICtx = DWARFContext::create(*Obj); + DICtx = DWARFContext::create(Obj); for (const std::unique_ptr<DWARFUnit> &CU : DICtx->compile_units()) LVP.addCompileUnit(CU->getUnitDIE(false)); } LLVM_DEBUG(LVP.dump()); - for (const SectionRef &Section : ToolSectionFilter(*Obj)) { + for (const SectionRef &Section : ToolSectionFilter(Obj)) { if (FilterSections.empty() && !DisassembleAll && (!Section.isText() || Section.isVirtual())) continue; @@ -1273,12 +1273,12 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, std::unordered_map<uint64_t, BBAddrMap> AddrToBBAddrMap; if (SymbolizeOperands) { - if (auto *Elf = dyn_cast<ELFObjectFileBase>(Obj)) { + if (auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj)) { // Read the BB-address-map corresponding to this section, if present. auto SectionBBAddrMapsOrErr = Elf->readBBAddrMap(Section.getIndex()); if (!SectionBBAddrMapsOrErr) reportWarning(toString(SectionBBAddrMapsOrErr.takeError()), - Obj->getFileName()); + Obj.getFileName()); for (auto &FunctionBBAddrMap : *SectionBBAddrMapsOrErr) AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr, std::move(FunctionBBAddrMap)); @@ -1306,17 +1306,17 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, llvm::sort(MappingSymbols); ArrayRef<uint8_t> Bytes = arrayRefFromStringRef( - unwrapOrError(Section.getContents(), Obj->getFileName())); + unwrapOrError(Section.getContents(), Obj.getFileName())); std::vector<std::unique_ptr<std::string>> SynthesizedLabelNames; - if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { + if (Obj.isELF() && Obj.getArch() == Triple::amdgcn) { // AMDGPU disassembler uses symbolizer for printing labels addSymbolizer(Ctx, TheTarget, TripleName, DisAsm, SectionAddr, Bytes, Symbols, SynthesizedLabelNames); } StringRef SegmentName = getSegmentName(MachO, Section); - StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName()); + StringRef SectionName = unwrapOrError(Section.getName(), Obj.getFileName()); // If the section has no symbol at the start, just insert a dummy one. if (Symbols.empty() || Symbols[0].Addr != 0) { Symbols.insert(Symbols.begin(), @@ -1335,7 +1335,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // In executable and shared objects, r_offset holds a virtual address. // Subtract SectionAddr from the r_offset field of a relocation to get // the section offset. - uint64_t RelAdjustment = Obj->isRelocatableObject() ? 0 : SectionAddr; + uint64_t RelAdjustment = Obj.isRelocatableObject() ? 0 : SectionAddr; uint64_t Size; uint64_t Index; bool PrintedSection = false; @@ -1381,7 +1381,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, if (LeadingAddr) outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", SectionAddr + Start + VMAAdjustment); - if (Obj->isXCOFF() && SymbolDescription) { + if (Obj.isXCOFF() && SymbolDescription) { outs() << getXCOFFSymbolDescription(Symbols[SI], SymbolName) << ":\n"; } else outs() << '<' << SymbolName << ">:\n"; @@ -1412,7 +1412,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // separately. But WebAssembly decodes preludes for some symbols. // if (Status) { - if (Status.getValue() == MCDisassembler::Fail) { + if (Status.value() == MCDisassembler::Fail) { outs() << "// Error in decoding " << SymbolName << " : Decoding failed region as bytes.\n"; for (uint64_t I = 0; I < Size; ++I) { @@ -1433,7 +1433,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // If there is a data/common symbol inside an ELF text section and we are // only disassembling text (applicable all architectures), we are in a // situation where we must print the data and not disassemble it. - if (Obj->isELF() && !DisassembleAll && Section.isText()) { + if (Obj.isELF() && !DisassembleAll && Section.isText()) { uint8_t SymTy = Symbols[SI].Type; if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) { dumpELFData(SectionAddr, Index, End, Bytes); @@ -1525,7 +1525,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, PIP.printInst( *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size), {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, FOS, - "", *STI, &SP, Obj->getFileName(), &Rels, LVP); + "", *STI, &SP, Obj.getFileName(), &Rels, LVP); IP->setCommentStream(llvm::nulls()); @@ -1563,7 +1563,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // // N.B. We don't walk the relocations in the relocatable case yet. std::vector<const SectionSymbolsTy *> TargetSectionSymbols; - if (!Obj->isRelocatableObject()) { + if (!Obj.isRelocatableObject()) { auto It = llvm::partition_point( SectionAddresses, [=](const std::pair<uint64_t, SectionRef> &O) { @@ -1644,7 +1644,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, Comments.clear(); // Hexagon does this in pretty printer - if (Obj->getArch() != Triple::hexagon) { + if (Obj.getArch() != Triple::hexagon) { // Print relocation for instruction and data. while (RelCur != RelEnd) { uint64_t Offset = RelCur->getOffset() - RelAdjustment; @@ -1663,15 +1663,15 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, break; // When --adjust-vma is used, update the address printed. - if (RelCur->getSymbol() != Obj->symbol_end()) { + if (RelCur->getSymbol() != Obj.symbol_end()) { Expected<section_iterator> SymSI = RelCur->getSymbol()->getSection(); - if (SymSI && *SymSI != Obj->section_end() && + if (SymSI && *SymSI != Obj.section_end() && shouldAdjustVA(**SymSI)) Offset += AdjustVMA; } - printRelocation(FOS, Obj->getFileName(), *RelCur, + printRelocation(FOS, Obj.getFileName(), *RelCur, SectionAddr + Offset, Is64Bits); LVP.printAfterOtherLine(FOS, true); ++RelCur; @@ -1743,7 +1743,7 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { // appropriate. std::unique_ptr<MCDisassembler> SecondaryDisAsm; std::unique_ptr<const MCSubtargetInfo> SecondarySTI; - if (isArmElf(Obj) && !STI->checkFeatures("+mclass")) { + if (isArmElf(*Obj) && !STI->checkFeatures("+mclass")) { if (STI->checkFeatures("+thumb-mode")) Features.AddFeature("-thumb-mode"); else @@ -1775,9 +1775,9 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { reportError(Obj->getFileName(), "Unrecognized disassembler option: " + Opt); - disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(), - MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP, - SP, InlineRelocs); + disassembleObject(TheTarget, *Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(), + MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP, SP, + InlineRelocs); } void objdump::printRelocations(const ObjectFile *Obj) { @@ -1871,28 +1871,28 @@ void objdump::printDynamicRelocations(const ObjectFile *Obj) { // Returns true if we need to show LMA column when dumping section headers. We // show it only when the platform is ELF and either we have at least one section // whose VMA and LMA are different and/or when --show-lma flag is used. -static bool shouldDisplayLMA(const ObjectFile *Obj) { - if (!Obj->isELF()) +static bool shouldDisplayLMA(const ObjectFile &Obj) { + if (!Obj.isELF()) return false; - for (const SectionRef &S : ToolSectionFilter(*Obj)) + for (const SectionRef &S : ToolSectionFilter(Obj)) if (S.getAddress() != getELFSectionLMA(S)) return true; return ShowLMA; } -static size_t getMaxSectionNameWidth(const ObjectFile *Obj) { +static size_t getMaxSectionNameWidth(const ObjectFile &Obj) { // Default column width for names is 13 even if no names are that long. size_t MaxWidth = 13; - for (const SectionRef &Section : ToolSectionFilter(*Obj)) { - StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); + for (const SectionRef &Section : ToolSectionFilter(Obj)) { + StringRef Name = unwrapOrError(Section.getName(), Obj.getFileName()); MaxWidth = std::max(MaxWidth, Name.size()); } return MaxWidth; } -void objdump::printSectionHeaders(const ObjectFile *Obj) { +void objdump::printSectionHeaders(const ObjectFile &Obj) { size_t NameWidth = getMaxSectionNameWidth(Obj); - size_t AddressWidth = 2 * Obj->getBytesInAddress(); + size_t AddressWidth = 2 * Obj.getBytesInAddress(); bool HasLMAColumn = shouldDisplayLMA(Obj); outs() << "\nSections:\n"; if (HasLMAColumn) @@ -1904,8 +1904,8 @@ void objdump::printSectionHeaders(const ObjectFile *Obj) { << left_justify("VMA", AddressWidth) << " Type\n"; uint64_t Idx; - for (const SectionRef &Section : ToolSectionFilter(*Obj, &Idx)) { - StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); + for (const SectionRef &Section : ToolSectionFilter(Obj, &Idx)) { + StringRef Name = unwrapOrError(Section.getName(), Obj.getFileName()); uint64_t VMA = Section.getAddress(); if (shouldAdjustVA(Section)) VMA += AdjustVMA; @@ -1983,33 +1983,33 @@ void objdump::printSectionContents(const ObjectFile *Obj) { } } -void objdump::printSymbolTable(const ObjectFile *O, StringRef ArchiveName, +void objdump::printSymbolTable(const ObjectFile &O, StringRef ArchiveName, StringRef ArchitectureName, bool DumpDynamic) { - if (O->isCOFF() && !DumpDynamic) { + if (O.isCOFF() && !DumpDynamic) { outs() << "\nSYMBOL TABLE:\n"; printCOFFSymbolTable(cast<const COFFObjectFile>(O)); return; } - const StringRef FileName = O->getFileName(); + const StringRef FileName = O.getFileName(); if (!DumpDynamic) { outs() << "\nSYMBOL TABLE:\n"; - for (auto I = O->symbol_begin(); I != O->symbol_end(); ++I) + for (auto I = O.symbol_begin(); I != O.symbol_end(); ++I) printSymbol(O, *I, {}, FileName, ArchiveName, ArchitectureName, DumpDynamic); return; } outs() << "\nDYNAMIC SYMBOL TABLE:\n"; - if (!O->isELF()) { + if (!O.isELF()) { reportWarning( "this operation is not currently supported for this file format", FileName); return; } - const ELFObjectFileBase *ELF = cast<const ELFObjectFileBase>(O); + const ELFObjectFileBase *ELF = cast<const ELFObjectFileBase>(&O); auto Symbols = ELF->getDynamicSymbolIterators(); Expected<std::vector<VersionEntry>> SymbolVersionsOrErr = ELF->readDynsymVersions(); @@ -2023,11 +2023,11 @@ void objdump::printSymbolTable(const ObjectFile *O, StringRef ArchiveName, ArchitectureName, DumpDynamic); } -void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, +void objdump::printSymbol(const ObjectFile &O, const SymbolRef &Symbol, ArrayRef<VersionEntry> SymbolVersions, StringRef FileName, StringRef ArchiveName, StringRef ArchitectureName, bool DumpDynamic) { - const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(O); + const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(&O); uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName, ArchitectureName); if ((Address < StartAddress) || (Address > StopAddress)) @@ -2050,12 +2050,12 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, IsSTAB = true; } section_iterator Section = IsSTAB - ? O->section_end() + ? O.section_end() : unwrapOrError(Symbol.getSection(), FileName, ArchiveName, ArchitectureName); StringRef Name; - if (Type == SymbolRef::ST_Debug && Section != O->section_end()) { + if (Type == SymbolRef::ST_Debug && Section != O.section_end()) { if (Expected<StringRef> NameOrErr = Section->getName()) Name = *NameOrErr; else @@ -2073,10 +2073,10 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, bool Hidden = Flags & SymbolRef::SF_Hidden; char GlobLoc = ' '; - if ((Section != O->section_end() || Absolute) && !Weak) + if ((Section != O.section_end() || Absolute) && !Weak) GlobLoc = Global ? 'g' : 'l'; char IFunc = ' '; - if (O->isELF()) { + if (O.isELF()) { if (ELFSymbolRef(Symbol).getELFType() == ELF::STT_GNU_IFUNC) IFunc = 'i'; if (ELFSymbolRef(Symbol).getBinding() == ELF::STB_GNU_UNIQUE) @@ -2097,7 +2097,7 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, else if (Type == SymbolRef::ST_Data) FileFunc = 'O'; - const char *Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64; + const char *Fmt = O.getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64; outs() << format(Fmt, Address) << " " << GlobLoc // Local -> 'l', Global -> 'g', Neither -> ' ' @@ -2112,9 +2112,9 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, outs() << "*ABS*"; } else if (Common) { outs() << "*COM*"; - } else if (Section == O->section_end()) { - if (O->isXCOFF()) { - XCOFFSymbolRef XCOFFSym = dyn_cast<const XCOFFObjectFile>(O)->toSymbolRef( + } else if (Section == O.section_end()) { + if (O.isXCOFF()) { + XCOFFSymbolRef XCOFFSym = cast<const XCOFFObjectFile>(O).toSymbolRef( Symbol.getRawDataRefImpl()); if (XCOFF::N_DEBUG == XCOFFSym.getSectionNumber()) outs() << "*DEBUG*"; @@ -2128,9 +2128,9 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, outs() << SegmentName << ","; StringRef SectionName = unwrapOrError(Section->getName(), FileName); outs() << SectionName; - if (O->isXCOFF()) { - Optional<SymbolRef> SymRef = getXCOFFSymbolContainingSymbolRef( - dyn_cast<const XCOFFObjectFile>(O), Symbol); + if (O.isXCOFF()) { + Optional<SymbolRef> SymRef = + getXCOFFSymbolContainingSymbolRef(cast<XCOFFObjectFile>(O), Symbol); if (SymRef) { Expected<StringRef> NameOrErr = SymRef->getName(); @@ -2144,7 +2144,7 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, if (SymbolDescription) SymName = getXCOFFSymbolDescription( - createSymbolInfo(O, SymRef.getValue()), SymName); + createSymbolInfo(O, SymRef.value()), SymName); outs() << ' ' << SymName; outs() << ") "; @@ -2156,14 +2156,14 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, if (Common) outs() << '\t' << format(Fmt, static_cast<uint64_t>(Symbol.getAlignment())); - else if (O->isXCOFF()) + else if (O.isXCOFF()) outs() << '\t' - << format(Fmt, dyn_cast<const XCOFFObjectFile>(O)->getSymbolSize( + << format(Fmt, cast<XCOFFObjectFile>(O).getSymbolSize( Symbol.getRawDataRefImpl())); - else if (O->isELF()) + else if (O.isELF()) outs() << '\t' << format(Fmt, ELFSymbolRef(Symbol).getSize()); - if (O->isELF()) { + if (O.isELF()) { if (!SymbolVersions.empty()) { const VersionEntry &Ver = SymbolVersions[Symbol.getRawDataRefImpl().d.b - 1]; @@ -2198,7 +2198,7 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, if (Demangle) SymName = demangle(SymName); - if (O->isXCOFF() && SymbolDescription) + if (O.isXCOFF() && SymbolDescription) SymName = getXCOFFSymbolDescription(createSymbolInfo(O, Symbol), SymName); outs() << ' ' << SymName << '\n'; @@ -2251,8 +2251,8 @@ static void printRawClangAST(const ObjectFile *Obj) { if (!ClangASTSection) return; - StringRef ClangASTContents = unwrapOrError( - ClangASTSection.getValue().getContents(), Obj->getFileName()); + StringRef ClangASTContents = + unwrapOrError(ClangASTSection.value().getContents(), Obj->getFileName()); outs().write(ClangASTContents.data(), ClangASTContents.size()); } @@ -2442,11 +2442,11 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr, if (PrivateHeaders || FirstPrivateHeader) printPrivateFileHeaders(O, FirstPrivateHeader); if (SectionHeaders) - printSectionHeaders(O); + printSectionHeaders(*O); if (SymbolTable) - printSymbolTable(O, ArchiveName); + printSymbolTable(*O, ArchiveName); if (DynamicSymbolTable) - printSymbolTable(O, ArchiveName, /*ArchitectureName=*/"", + printSymbolTable(*O, ArchiveName, /*ArchitectureName=*/"", /*DumpDynamic=*/true); if (DwarfDumpType != DIDT_Null) { std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O); @@ -2501,7 +2501,7 @@ static void dumpObject(const COFFImportFile *I, const Archive *A, if (ArchiveHeaders && !MachOOpt && C) printArchiveChild(ArchiveName, *C); if (SymbolTable) - printCOFFSymbolTable(I); + printCOFFSymbolTable(*I); } /// Dump each object file in \a a; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h index 61b6215aa5f5..dd9f58aa3308 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.h +++ b/llvm/tools/llvm-objdump/llvm-objdump.h @@ -124,12 +124,12 @@ SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O, bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B); void printRelocations(const object::ObjectFile *O); void printDynamicRelocations(const object::ObjectFile *O); -void printSectionHeaders(const object::ObjectFile *O); +void printSectionHeaders(const object::ObjectFile &O); void printSectionContents(const object::ObjectFile *O); -void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName, +void printSymbolTable(const object::ObjectFile &O, StringRef ArchiveName, StringRef ArchitectureName = StringRef(), bool DumpDynamic = false); -void printSymbol(const object::ObjectFile *O, const object::SymbolRef &Symbol, +void printSymbol(const object::ObjectFile &O, const object::SymbolRef &Symbol, ArrayRef<object::VersionEntry> SymbolVersions, StringRef FileName, StringRef ArchiveName, StringRef ArchitectureName, bool DumpDynamic); @@ -148,7 +148,7 @@ T unwrapOrError(Expected<T> EO, Ts &&... Args) { std::string getFileNameForError(const object::Archive::Child &C, unsigned Index); -SymbolInfoTy createSymbolInfo(const object::ObjectFile *Obj, +SymbolInfoTy createSymbolInfo(const object::ObjectFile &Obj, const object::SymbolRef &Symbol); } // namespace objdump diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp index 3b922a7bea21..3859558b32ff 100644 --- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -84,7 +84,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/LineIterator.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" diff --git a/llvm/tools/llvm-sim/llvm-sim.cpp b/llvm/tools/llvm-sim/llvm-sim.cpp index 6879d73c4434..2b717d72e48a 100644 --- a/llvm/tools/llvm-sim/llvm-sim.cpp +++ b/llvm/tools/llvm-sim/llvm-sim.cpp @@ -90,8 +90,8 @@ exportToFile(const StringRef FilePath, assert(End && "Could not find instruction number for last instruction"); J.object([&] { - J.attribute("start", Start.getValue()); - J.attribute("end", End.getValue()); + J.attribute("start", Start.value()); + J.attribute("end", End.value()); }); } J.arrayEnd(); diff --git a/llvm/tools/llvm-xray/xray-registry.cpp b/llvm/tools/llvm-xray/xray-registry.cpp index e5c253d2e8f1..34ac07ebe45c 100644 --- a/llvm/tools/llvm-xray/xray-registry.cpp +++ b/llvm/tools/llvm-xray/xray-registry.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "xray-registry.h" -#include "llvm/Support/ManagedStatic.h" #include <unordered_map> namespace llvm { @@ -19,19 +18,22 @@ namespace xray { using HandlerType = std::function<Error()>; -ManagedStatic<std::unordered_map<cl::SubCommand *, HandlerType>> Commands; +static std::unordered_map<cl::SubCommand *, HandlerType> &getCommands() { + static std::unordered_map<cl::SubCommand *, HandlerType> Commands; + return Commands; +} CommandRegistration::CommandRegistration(cl::SubCommand *SC, HandlerType Command) { - assert(Commands->count(SC) == 0 && + assert(getCommands().count(SC) == 0 && "Attempting to overwrite a command handler"); assert(Command && "Attempting to register an empty std::function<Error()>"); - (*Commands)[SC] = Command; + getCommands()[SC] = Command; } HandlerType dispatch(cl::SubCommand *SC) { - auto It = Commands->find(SC); - assert(It != Commands->end() && + auto It = getCommands().find(SC); + assert(It != getCommands().end() && "Attempting to dispatch on un-registered SubCommand."); return It->second; } diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index 17c5da408560..61d0e121fc5a 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -49,6 +49,19 @@ cl::opt<std::string> DebugifyExport("debugify-export", cl::desc("Export per-pass debugify statistics to this file"), cl::value_desc("filename")); + +cl::opt<bool> VerifyEachDebugInfoPreserve( + "verify-each-debuginfo-preserve", + cl::desc("Start each pass with collecting and end it with checking of " + "debug info preservation.")); + +cl::opt<std::string> + VerifyDIPreserveExport("verify-di-preserve-export", + cl::desc("Export debug info preservation failures into " + "specified (JSON) file (should be abs path as we use" + " append mode to insert new JSON objects)"), + cl::value_desc("filename"), cl::init("")); + } // namespace llvm enum class DebugLogging { None, Normal, Verbose, Quiet }; @@ -280,7 +293,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, bool EmitModuleHash, - bool EnableDebugify) { + bool EnableDebugify, bool VerifyDIPreserve) { bool VerifyEachPass = VK == VK_VerifyEachPass; Optional<PGOOptions> P; @@ -337,8 +350,19 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, PrintPassOpts); SI.registerCallbacks(PIC, &FAM); DebugifyEachInstrumentation Debugify; - if (DebugifyEach) + DebugifyStatsMap DIStatsMap; + DebugInfoPerPass DebugInfoBeforePass; + if (DebugifyEach) { + Debugify.setDIStatsMap(DIStatsMap); + Debugify.setDebugifyMode(DebugifyMode::SyntheticDebugInfo); + Debugify.registerCallbacks(PIC); + } else if (VerifyEachDebugInfoPreserve) { + Debugify.setDebugInfoBeforePass(DebugInfoBeforePass); + Debugify.setDebugifyMode(DebugifyMode::OriginalDebugInfo); + Debugify.setOrigDIVerifyBugsReportFilePath( + VerifyDIPreserveExport); Debugify.registerCallbacks(PIC); + } PipelineTuningOptions PTO; // LoopUnrolling defaults on to true and DisableLoopUnrolling is initialized @@ -417,6 +441,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, MPM.addPass(VerifierPass()); if (EnableDebugify) MPM.addPass(NewPMDebugifyPass()); + if (VerifyDIPreserve) + MPM.addPass(NewPMDebugifyPass(DebugifyMode::OriginalDebugInfo, "", + &DebugInfoBeforePass)); // Add passes according to the -passes options. if (!PassPipeline.empty()) { @@ -456,7 +483,11 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, if (VK > VK_NoVerifier) MPM.addPass(VerifierPass()); if (EnableDebugify) - MPM.addPass(NewPMCheckDebugifyPass()); + MPM.addPass(NewPMCheckDebugifyPass(false, "", &DIStatsMap)); + if (VerifyDIPreserve) + MPM.addPass(NewPMCheckDebugifyPass( + false, "", nullptr, DebugifyMode::OriginalDebugInfo, &DebugInfoBeforePass, + VerifyDIPreserveExport)); // Add any relevant output pass at the end of the pipeline. switch (OK) { @@ -504,7 +535,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, OptRemarkFile->keep(); if (DebugifyEach && !DebugifyExport.empty()) - exportDebugifyStats(DebugifyExport, Debugify.StatsMap); + exportDebugifyStats(DebugifyExport, Debugify.getDebugifyStatsMap()); return true; } diff --git a/llvm/tools/opt/NewPMDriver.h b/llvm/tools/opt/NewPMDriver.h index 16bb205afdca..543f91c383f2 100644 --- a/llvm/tools/opt/NewPMDriver.h +++ b/llvm/tools/opt/NewPMDriver.h @@ -33,6 +33,9 @@ class TargetLibraryInfoImpl; extern cl::opt<bool> DebugifyEach; extern cl::opt<std::string> DebugifyExport; +extern cl::opt<bool> VerifyEachDebugInfoPreserve; +extern cl::opt<std::string> VerifyDIPreserveExport; + namespace opt_tool { enum OutputKind { OK_NoOutput, @@ -74,7 +77,7 @@ bool runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, bool EmitModuleHash, - bool EnableDebugify); + bool EnableDebugify, bool VerifyDIPreserve); } // namespace llvm #endif diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 0e013ef3b9fd..1160412e37af 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -206,18 +206,6 @@ static cl::opt<bool> VerifyDebugInfoPreserve( cl::desc("Start the pipeline with collecting and end it with checking of " "debug info preservation.")); -static cl::opt<bool> VerifyEachDebugInfoPreserve( - "verify-each-debuginfo-preserve", - cl::desc("Start each pass with collecting and end it with checking of " - "debug info preservation.")); - -static cl::opt<std::string> - VerifyDIPreserveExport("verify-di-preserve-export", - cl::desc("Export debug info preservation failures into " - "specified (JSON) file (should be abs path as we use" - " append mode to insert new JSON objects)"), - cl::value_desc("filename"), cl::init("")); - static cl::opt<bool> PrintBreakpoints("print-breakpoints-for-testing", cl::desc("Print select breakpoints location for testing")); @@ -823,7 +811,8 @@ int main(int argc, char **argv) { ThinLinkOut.get(), RemarksFile.get(), Pipeline, Passes, PluginList, OK, VK, PreserveAssemblyUseListOrder, PreserveBitcodeUseListOrder, EmitSummaryIndex, - EmitModuleHash, EnableDebugify) + EmitModuleHash, EnableDebugify, + VerifyDebugInfoPreserve) ? 0 : 1; } diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp index 2b9931b23c11..1d00c3cfd069 100644 --- a/llvm/utils/TableGen/CodeEmitterGen.cpp +++ b/llvm/utils/TableGen/CodeEmitterGen.cpp @@ -332,14 +332,6 @@ std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *Enc return Case; } -static std::string -getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) { - std::string Name = "CEFBS"; - for (const auto &Feature : FeatureBitset) - Name += ("_" + Feature->getName()).str(); - return Name; -} - static void emitInstBits(raw_ostream &OS, const APInt &Bits) { for (unsigned I = 0; I < Bits.getNumWords(); ++I) OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) @@ -530,131 +522,6 @@ void CodeEmitterGen::run(raw_ostream &o) { o << " return Value;\n"; o << "}\n\n"; } - - const auto &All = SubtargetFeatureInfo::getAll(Records); - std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures; - SubtargetFeatures.insert(All.begin(), All.end()); - - o << "#ifdef ENABLE_INSTR_PREDICATE_VERIFIER\n" - << "#undef ENABLE_INSTR_PREDICATE_VERIFIER\n" - << "#include <sstream>\n\n"; - - // Emit the subtarget feature enumeration. - SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures, - o); - - // Emit the name table for error messages. - o << "#ifndef NDEBUG\n"; - SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, o); - o << "#endif // NDEBUG\n"; - - // Emit the available features compute function. - SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( - Target.getName(), "MCCodeEmitter", "computeAvailableFeatures", - SubtargetFeatures, o); - - std::vector<std::vector<Record *>> FeatureBitsets; - for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { - FeatureBitsets.emplace_back(); - for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) { - const auto &I = SubtargetFeatures.find(Predicate); - if (I != SubtargetFeatures.end()) - FeatureBitsets.back().push_back(I->second.TheDef); - } - } - - llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A, - const std::vector<Record *> &B) { - if (A.size() < B.size()) - return true; - if (A.size() > B.size()) - return false; - for (auto Pair : zip(A, B)) { - if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName()) - return true; - if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName()) - return false; - } - return false; - }); - FeatureBitsets.erase( - std::unique(FeatureBitsets.begin(), FeatureBitsets.end()), - FeatureBitsets.end()); - o << "#ifndef NDEBUG\n" - << "// Feature bitsets.\n" - << "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n" - << " CEFBS_None,\n"; - for (const auto &FeatureBitset : FeatureBitsets) { - if (FeatureBitset.empty()) - continue; - o << " " << getNameForFeatureBitset(FeatureBitset) << ",\n"; - } - o << "};\n\n" - << "static constexpr FeatureBitset FeatureBitsets[] = {\n" - << " {}, // CEFBS_None\n"; - for (const auto &FeatureBitset : FeatureBitsets) { - if (FeatureBitset.empty()) - continue; - o << " {"; - for (const auto &Feature : FeatureBitset) { - const auto &I = SubtargetFeatures.find(Feature); - assert(I != SubtargetFeatures.end() && "Didn't import predicate?"); - o << I->second.getEnumBitName() << ", "; - } - o << "},\n"; - } - o << "};\n" - << "#endif // NDEBUG\n\n"; - - - // Emit the predicate verifier. - o << "void " << Target.getName() - << "MCCodeEmitter::verifyInstructionPredicates(\n" - << " const MCInst &Inst, const FeatureBitset &AvailableFeatures) const {\n" - << "#ifndef NDEBUG\n" - << " static " << getMinimalTypeForRange(FeatureBitsets.size()) - << " RequiredFeaturesRefs[] = {\n"; - unsigned InstIdx = 0; - for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { - o << " CEFBS"; - unsigned NumPredicates = 0; - for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) { - const auto &I = SubtargetFeatures.find(Predicate); - if (I != SubtargetFeatures.end()) { - o << '_' << I->second.TheDef->getName(); - NumPredicates++; - } - } - if (!NumPredicates) - o << "_None"; - o << ", // " << Inst->TheDef->getName() << " = " << InstIdx << "\n"; - InstIdx++; - } - o << " };\n\n"; - o << " assert(Inst.getOpcode() < " << InstIdx << ");\n"; - o << " const FeatureBitset &RequiredFeatures = " - "FeatureBitsets[RequiredFeaturesRefs[Inst.getOpcode()]];\n"; - o << " FeatureBitset MissingFeatures =\n" - << " (AvailableFeatures & RequiredFeatures) ^\n" - << " RequiredFeatures;\n" - << " if (MissingFeatures.any()) {\n" - << " std::ostringstream Msg;\n" - << " Msg << \"Attempting to emit \" << " - "MCII.getName(Inst.getOpcode()).str()\n" - << " << \" instruction but the \";\n" - << " for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)\n" - << " if (MissingFeatures.test(i))\n" - << " Msg << SubtargetFeatureNames[i] << \" \";\n" - << " Msg << \"predicate(s) are not met\";\n" - << " report_fatal_error(Msg.str().c_str());\n" - << " }\n" - << "#else\n" - << " // Silence unused variable warning on targets that don't use MCII for " - "other purposes (e.g. BPF).\n" - << " (void)MCII;\n" - << "#endif // NDEBUG\n"; - o << "}\n"; - o << "#endif\n"; } } // end anonymous namespace diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index 9d6adb6d2c37..c15728ac7d23 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -46,6 +46,9 @@ static inline bool isVector(MVT VT) { static inline bool isScalar(MVT VT) { return !VT.isVector(); } +static inline bool isScalarInteger(MVT VT) { + return VT.isScalarInteger(); +} template <typename Predicate> static bool berase_if(MachineValueTypeSet &S, Predicate P) { @@ -61,6 +64,17 @@ static bool berase_if(MachineValueTypeSet &S, Predicate P) { return Erased; } +void MachineValueTypeSet::writeToStream(raw_ostream &OS) const { + SmallVector<MVT, 4> Types(begin(), end()); + array_pod_sort(Types.begin(), Types.end()); + + OS << '['; + ListSeparator LS(" "); + for (const MVT &T : Types) + OS << LS << ValueTypeByHwMode::getMVTName(T); + OS << ']'; +} + // --- TypeSetByHwMode // This is a parameterized type-set class. For each mode there is a list @@ -193,22 +207,11 @@ void TypeSetByHwMode::writeToStream(raw_ostream &OS) const { OS << '{'; for (unsigned M : Modes) { OS << ' ' << getModeName(M) << ':'; - writeToStream(get(M), OS); + get(M).writeToStream(OS); } OS << " }"; } -void TypeSetByHwMode::writeToStream(const SetType &S, raw_ostream &OS) { - SmallVector<MVT, 4> Types(S.begin(), S.end()); - array_pod_sort(Types.begin(), Types.end()); - - OS << '['; - ListSeparator LS(" "); - for (const MVT &T : Types) - OS << LS << ValueTypeByHwMode::getMVTName(T); - OS << ']'; -} - bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const { // The isSimple call is much quicker than hasDefault - check this first. bool IsSimple = isSimple(); @@ -253,6 +256,10 @@ bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const { } namespace llvm { + raw_ostream &operator<<(raw_ostream &OS, const MachineValueTypeSet &T) { + T.writeToStream(OS); + return OS; + } raw_ostream &operator<<(raw_ostream &OS, const TypeSetByHwMode &T) { T.writeToStream(OS); return OS; @@ -266,10 +273,11 @@ void TypeSetByHwMode::dump() const { bool TypeSetByHwMode::intersect(SetType &Out, const SetType &In) { bool OutP = Out.count(MVT::iPTR), InP = In.count(MVT::iPTR); - auto Int = [&In](MVT T) -> bool { return !In.count(T); }; + // Complement of In. + auto CompIn = [&In](MVT T) -> bool { return !In.count(T); }; if (OutP == InP) - return berase_if(Out, Int); + return berase_if(Out, CompIn); // Compute the intersection of scalars separately to account for only // one set containing iPTR. @@ -285,42 +293,64 @@ bool TypeSetByHwMode::intersect(SetType &Out, const SetType &In) { // { iPTR i32 } * { i32 i64 } -> { i32 i64 } // { iPTR i32 } * { i32 i64 i128 } -> { iPTR i32 } - // Compute the difference between the two sets in such a way that the - // iPTR is in the set that is being subtracted. This is to see if there - // are any extra scalars in the set without iPTR that are not in the - // set containing iPTR. Then the iPTR could be considered a "wildcard" - // matching these scalars. If there is only one such scalar, it would - // replace the iPTR, if there are more, the iPTR would be retained. - SetType Diff; + // Let In' = elements only in In, Out' = elements only in Out, and + // IO = elements common to both. Normally IO would be returned as the result + // of the intersection, but we need to account for iPTR being a "wildcard" of + // sorts. Since elements in IO are those that match both sets exactly, they + // will all belong to the output. If any of the "leftovers" (i.e. In' or + // Out') contain iPTR, it means that the other set doesn't have it, but it + // could have (1) a more specific type, or (2) a set of types that is less + // specific. The "leftovers" from the other set is what we want to examine + // more closely. + + auto subtract = [](const SetType &A, const SetType &B) { + SetType Diff = A; + berase_if(Diff, [&B](MVT T) { return B.count(T); }); + return Diff; + }; + if (InP) { - Diff = Out; - berase_if(Diff, [&In](MVT T) { return In.count(T); }); - // Pre-remove these elements and rely only on InP/OutP to determine - // whether a change has been made. - berase_if(Out, [&Diff](MVT T) { return Diff.count(T); }); - } else { - Diff = In; - berase_if(Diff, [&Out](MVT T) { return Out.count(T); }); - Out.erase(MVT::iPTR); - } - - // The actual intersection. - bool Changed = berase_if(Out, Int); - unsigned NumD = Diff.size(); - if (NumD == 0) - return Changed; - - if (NumD == 1) { - Out.insert(*Diff.begin()); - // This is a change only if Out was the one with iPTR (which is now - // being replaced). - Changed |= OutP; - } else { - // Multiple elements from Out are now replaced with iPTR. - Out.insert(MVT::iPTR); - Changed |= !OutP; + SetType OutOnly = subtract(Out, In); + if (OutOnly.empty()) { + // This means that Out \subset In, so no change to Out. + return false; + } + unsigned NumI = llvm::count_if(OutOnly, isScalarInteger); + if (NumI == 1 && OutOnly.size() == 1) { + // There is only one element in Out', and it happens to be a scalar + // integer that should be kept as a match for iPTR in In. + return false; + } + berase_if(Out, CompIn); + if (NumI == 1) { + // Replace the iPTR with the leftover scalar integer. + Out.insert(*llvm::find_if(OutOnly, isScalarInteger)); + } else if (NumI > 1) { + Out.insert(MVT::iPTR); + } + return true; } - return Changed; + + // OutP == true + SetType InOnly = subtract(In, Out); + unsigned SizeOut = Out.size(); + berase_if(Out, CompIn); // This will remove at least the iPTR. + unsigned NumI = llvm::count_if(InOnly, isScalarInteger); + if (NumI == 0) { + // iPTR deleted from Out. + return true; + } + if (NumI == 1) { + // Replace the iPTR with the leftover scalar integer. + Out.insert(*llvm::find_if(InOnly, isScalarInteger)); + return true; + } + + // NumI > 1: Keep the iPTR in Out. + Out.insert(MVT::iPTR); + // If iPTR was the only element initially removed from Out, then Out + // has not changed. + return SizeOut != Out.size(); } bool TypeSetByHwMode::validate() const { @@ -902,7 +932,7 @@ TreePredicateFn::TreePredicateFn(TreePattern *N) : PatFragRec(N) { } bool TreePredicateFn::hasPredCode() const { - return isLoad() || isStore() || isAtomic() || + return isLoad() || isStore() || isAtomic() || hasNoUse() || !PatFragRec->getRecord()->getValueAsString("PredicateCode").empty(); } @@ -947,12 +977,15 @@ std::string TreePredicateFn::getPredCode() const { if (isAnyExtLoad()) PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), "IsAnyExtLoad requires IsLoad"); - if (isSignExtLoad()) - PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), - "IsSignExtLoad requires IsLoad"); - if (isZeroExtLoad()) - PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), - "IsZeroExtLoad requires IsLoad"); + + if (!isAtomic()) { + if (isSignExtLoad()) + PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), + "IsSignExtLoad requires IsLoad or IsAtomic"); + if (isZeroExtLoad()) + PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), + "IsZeroExtLoad requires IsLoad or IsAtomic"); + } } if (isStore()) { @@ -973,8 +1006,9 @@ std::string TreePredicateFn::getPredCode() const { if (isAtomic()) { if (getMemoryVT() == nullptr && !isAtomicOrderingMonotonic() && getAddressSpaces() == nullptr && - !isAtomicOrderingAcquire() && !isAtomicOrderingRelease() && - !isAtomicOrderingAcquireRelease() && + // FIXME: Should atomic loads be IsLoad, IsAtomic, or both? + !isZeroExtLoad() && !isSignExtLoad() && !isAtomicOrderingAcquire() && + !isAtomicOrderingRelease() && !isAtomicOrderingAcquireRelease() && !isAtomicOrderingSequentiallyConsistent() && !isAtomicOrderingAcquireOrStronger() && !isAtomicOrderingReleaseOrStronger() && @@ -1075,6 +1109,10 @@ std::string TreePredicateFn::getPredCode() const { Code += "if (isReleaseOrStronger(cast<AtomicSDNode>(N)->getMergedOrdering())) " "return false;\n"; + // TODO: Handle atomic sextload/zextload normally when ATOMIC_LOAD is removed. + if (isAtomic() && (isZeroExtLoad() || isSignExtLoad())) + Code += "return false;\n"; + if (isLoad() || isStore()) { StringRef SDNodeName = isLoad() ? "LoadSDNode" : "StoreSDNode"; @@ -1124,6 +1162,9 @@ std::string TreePredicateFn::getPredCode() const { .str(); } + if (hasNoUse()) + Code += "if (!SDValue(N, 0).use_empty()) return false;\n"; + std::string PredicateCode = std::string(PatFragRec->getRecord()->getValueAsString("PredicateCode")); @@ -1167,6 +1208,9 @@ bool TreePredicateFn::isPredefinedPredicateEqualTo(StringRef Field, bool TreePredicateFn::usesOperands() const { return isPredefinedPredicateEqualTo("PredicateCodeUsesOperands", true); } +bool TreePredicateFn::hasNoUse() const { + return isPredefinedPredicateEqualTo("HasNoUse", true); +} bool TreePredicateFn::isLoad() const { return isPredefinedPredicateEqualTo("IsLoad", true); } diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h index 94694a96eb90..dbdc72f0873a 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.h +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h @@ -102,6 +102,8 @@ struct MachineValueTypeSet { Words[T.SimpleTy / WordWidth] &= ~(WordType(1) << (T.SimpleTy % WordWidth)); } + void writeToStream(raw_ostream &OS) const; + struct const_iterator { // Some implementations of the C++ library require these traits to be // defined. @@ -185,6 +187,8 @@ private: std::array<WordType,NumWords> Words; }; +raw_ostream &operator<<(raw_ostream &OS, const MachineValueTypeSet &T); + struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> { using SetType = MachineValueTypeSet; SmallVector<unsigned, 16> AddrSpaces; @@ -239,7 +243,6 @@ struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> { bool assign_if(const TypeSetByHwMode &VTS, Predicate P); void writeToStream(raw_ostream &OS) const; - static void writeToStream(const SetType &S, raw_ostream &OS); bool operator==(const TypeSetByHwMode &VTS) const; bool operator!=(const TypeSetByHwMode &VTS) const { return !(*this == VTS); } @@ -538,6 +541,9 @@ public: // Predicate code uses the PatFrag's captured operands. bool usesOperands() const; + // Check if the HasNoUse predicate is set. + bool hasNoUse() const; + // Is the desired predefined predicate for a load? bool isLoad() const; // Is the desired predefined predicate for a store? diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp index 2c61be713afc..93ed86cfb7e5 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -861,6 +861,26 @@ void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) { Orders[i].push_back(Super.Orders[i][j]); } +bool CodeGenRegisterClass::hasType(const ValueTypeByHwMode &VT) const { + if (llvm::is_contained(VTs, VT)) + return true; + + // If VT is not identical to any of this class's types, but is a simple + // type, check if any of the types for this class contain it under some + // mode. + // The motivating example came from RISCV, where (likely because of being + // guarded by "64-bit" predicate), the type of X5 was {*:[i64]}, but the + // type in GRC was {*:[i32], m1:[i64]}. + if (VT.isSimple()) { + MVT T = VT.getSimple(); + for (const ValueTypeByHwMode &OurVT : VTs) { + if (llvm::count_if(OurVT, [T](auto &&P) { return P.second == T; })) + return true; + } + } + return false; +} + bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const { return std::binary_search(Members.begin(), Members.end(), Reg, deref<std::less<>>()); diff --git a/llvm/utils/TableGen/CodeGenRegisters.h b/llvm/utils/TableGen/CodeGenRegisters.h index 0fc8b3ef80dd..e5e92fc81f50 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.h +++ b/llvm/utils/TableGen/CodeGenRegisters.h @@ -351,10 +351,7 @@ namespace llvm { std::string getQualifiedName() const; ArrayRef<ValueTypeByHwMode> getValueTypes() const { return VTs; } unsigned getNumValueTypes() const { return VTs.size(); } - - bool hasType(const ValueTypeByHwMode &VT) const { - return llvm::is_contained(VTs, VT); - } + bool hasType(const ValueTypeByHwMode &VT) const; const ValueTypeByHwMode &getValueTypeNum(unsigned VTNum) const { if (VTNum < VTs.size()) diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index c8eac56d03e6..4b47cda41567 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -331,6 +331,9 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) { if (Predicate.isImmediatePattern()) continue; + if (Predicate.hasNoUse()) + continue; + if (Predicate.isNonExtLoad() || Predicate.isAnyExtLoad() || Predicate.isSignExtLoad() || Predicate.isZeroExtLoad()) continue; @@ -1119,6 +1122,7 @@ public: IPM_MemoryAddressSpace, IPM_MemoryAlignment, IPM_VectorSplatImm, + IPM_NoUse, IPM_GenericPredicate, OPM_SameOperand, OPM_ComplexPattern, @@ -2238,6 +2242,29 @@ public: } }; +/// Generates code to check for the absence of use of the result. +// TODO? Generalize this to support checking for one use. +class NoUsePredicateMatcher : public InstructionPredicateMatcher { +public: + NoUsePredicateMatcher(unsigned InsnVarID) + : InstructionPredicateMatcher(IPM_NoUse, InsnVarID) {} + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == IPM_NoUse; + } + + bool isIdentical(const PredicateMatcher &B) const override { + return InstructionPredicateMatcher::isIdentical(B); + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckHasNoUse") + << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID) + << MatchTable::LineBreak; + } +}; + /// Generates code to check that a set of predicates and operands match for a /// particular instruction. /// @@ -2943,7 +2970,7 @@ public: << MatchTable::IntValue(RendererID); if (SubOperand) Table << MatchTable::Comment("SubOperand") - << MatchTable::IntValue(SubOperand.getValue()); + << MatchTable::IntValue(SubOperand.value()); Table << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak; } }; @@ -3758,10 +3785,12 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const { for (const TreePredicateCall &Call : N->getPredicateCalls()) { const TreePredicateFn &Predicate = Call.Fn; - if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() && + if (!Equiv.isValueUnset("IfSignExtend") && + (Predicate.isLoad() || Predicate.isAtomic()) && Predicate.isSignExtLoad()) return &Target.getInstruction(Equiv.getValueAsDef("IfSignExtend")); - if (!Equiv.isValueUnset("IfZeroExtend") && Predicate.isLoad() && + if (!Equiv.isValueUnset("IfZeroExtend") && + (Predicate.isLoad() || Predicate.isAtomic()) && Predicate.isZeroExtLoad()) return &Target.getInstruction(Equiv.getValueAsDef("IfZeroExtend")); } @@ -4000,6 +4029,17 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher( if (auto Error = InsnMatcherOrError.takeError()) return std::move(Error); + // FIXME: This should be part of addBuiltinPredicates(). If we add this at + // the start of addBuiltinPredicates() without returning, then there might + // be cases where we hit the last return before which the + // HasAddedBuiltinMatcher will be set to false. The predicate could be + // missed if we add it in the middle or at the end due to return statements + // after the addPredicate<>() calls. + if (Predicate.hasNoUse()) { + InsnMatcher.addPredicate<NoUsePredicateMatcher>(); + HasAddedBuiltinMatcher = true; + } + if (Predicate.hasGISelPredicateCode()) { if (Predicate.usesOperands()) { assert(WaitingForNamedOperands == 0 && @@ -4946,8 +4986,8 @@ Error GlobalISelEmitter::importDefaultOperandRenderers( auto Def = DefaultDefOp->getDef(); if (Def->getName() == "undef_tied_input") { unsigned TempRegID = M.allocateTempRegID(); - M.insertAction<MakeTempRegisterAction>( - InsertPt, OpTyOrNone.getValue(), TempRegID); + M.insertAction<MakeTempRegisterAction>(InsertPt, OpTyOrNone.value(), + TempRegID); InsertPt = M.insertAction<BuildMIAction>( InsertPt, M.allocateOutputInsnID(), &Target.getInstruction(RK.getDef("IMPLICIT_DEF"))); @@ -5206,16 +5246,31 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { auto &DstI = Target.getInstruction(DstOp); StringRef DstIName = DstI.TheDef->getName(); - if (DstI.Operands.NumDefs < Src->getExtTypes().size()) - return failedImport("Src pattern result has more defs than dst MI (" + - to_string(Src->getExtTypes().size()) + " def(s) vs " + - to_string(DstI.Operands.NumDefs) + " def(s))"); + unsigned DstNumDefs = DstI.Operands.NumDefs, + SrcNumDefs = Src->getExtTypes().size(); + if (DstNumDefs < SrcNumDefs) { + if (DstNumDefs != 0) + return failedImport("Src pattern result has more defs than dst MI (" + + to_string(SrcNumDefs) + " def(s) vs " + + to_string(DstNumDefs) + " def(s))"); + + bool FoundNoUsePred = false; + for (const auto &Pred : InsnMatcher.predicates()) { + if ((FoundNoUsePred = isa<NoUsePredicateMatcher>(Pred.get()))) + break; + } + if (!FoundNoUsePred) + return failedImport("Src pattern result has " + to_string(SrcNumDefs) + + " def(s) without the HasNoUse predicate set to true " + "but Dst MI has no def"); + } // The root of the match also has constraints on the register bank so that it // matches the result instruction. unsigned OpIdx = 0; - for (const TypeSetByHwMode &VTy : Src->getExtTypes()) { - (void)VTy; + unsigned N = std::min(DstNumDefs, SrcNumDefs); + for (unsigned I = 0; I < N; ++I) { + const TypeSetByHwMode &VTy = Src->getExtType(I); const auto &DstIOperand = DstI.Operands[OpIdx]; Record *DstIOpRec = DstIOperand.Rec; diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index a7a4f4f5f1a7..da8d0a0096fd 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -17,7 +17,9 @@ #include "CodeGenTarget.h" #include "PredicateExpander.h" #include "SequenceToOffsetTable.h" +#include "SubtargetFeatureInfo.h" #include "TableGenBackends.h" +#include "Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" @@ -79,6 +81,9 @@ private: /// Expand TIIPredicate definitions to functions that accept a const MCInst /// reference. void emitMCIIHelperMethods(raw_ostream &OS, StringRef TargetName); + + /// Write verifyInstructionPredicates methods. + void emitFeatureVerifier(raw_ostream &OS, const CodeGenTarget &Target); void emitRecord(const CodeGenInstruction &Inst, unsigned Num, Record *InstrInfo, std::map<std::vector<Record*>, unsigned> &EL, @@ -666,14 +671,13 @@ void InstrInfoEmitter::emitLogicalOperandTypeMappings( void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, StringRef TargetName) { RecVec TIIPredicates = Records.getAllDerivedDefinitions("TIIPredicate"); - if (TIIPredicates.empty()) - return; OS << "#ifdef GET_INSTRINFO_MC_HELPER_DECLS\n"; OS << "#undef GET_INSTRINFO_MC_HELPER_DECLS\n\n"; OS << "namespace llvm {\n"; - OS << "class MCInst;\n\n"; + OS << "class MCInst;\n"; + OS << "class FeatureBitset;\n\n"; OS << "namespace " << TargetName << "_MC {\n\n"; @@ -682,6 +686,9 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, << "(const MCInst &MI);\n"; } + OS << "void verifyInstructionPredicates(unsigned Opcode, const FeatureBitset " + "&Features);\n"; + OS << "\n} // end namespace " << TargetName << "_MC\n"; OS << "} // end namespace llvm\n\n"; @@ -708,7 +715,143 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, OS << "} // end namespace " << TargetName << "_MC\n"; OS << "} // end namespace llvm\n\n"; - OS << "#endif // GET_GENISTRINFO_MC_HELPERS\n"; + OS << "#endif // GET_GENISTRINFO_MC_HELPERS\n\n"; +} + +static std::string +getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) { + std::string Name = "CEFBS"; + for (const auto &Feature : FeatureBitset) + Name += ("_" + Feature->getName()).str(); + return Name; +} + +void InstrInfoEmitter::emitFeatureVerifier(raw_ostream &OS, + const CodeGenTarget &Target) { + const auto &All = SubtargetFeatureInfo::getAll(Records); + std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures; + SubtargetFeatures.insert(All.begin(), All.end()); + + OS << "#ifdef ENABLE_INSTR_PREDICATE_VERIFIER\n" + << "#undef ENABLE_INSTR_PREDICATE_VERIFIER\n" + << "#include <sstream>\n\n"; + + OS << "namespace llvm {\n"; + OS << "namespace " << Target.getName() << "_MC {\n\n"; + + // Emit the subtarget feature enumeration. + SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures, + OS); + + // Emit the name table for error messages. + OS << "#ifndef NDEBUG\n"; + SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, OS); + OS << "#endif // NDEBUG\n\n"; + + // Emit the available features compute function. + SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( + Target.getName(), "", "computeAvailableFeatures", SubtargetFeatures, OS); + + std::vector<std::vector<Record *>> FeatureBitsets; + for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { + FeatureBitsets.emplace_back(); + for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) { + const auto &I = SubtargetFeatures.find(Predicate); + if (I != SubtargetFeatures.end()) + FeatureBitsets.back().push_back(I->second.TheDef); + } + } + + llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A, + const std::vector<Record *> &B) { + if (A.size() < B.size()) + return true; + if (A.size() > B.size()) + return false; + for (auto Pair : zip(A, B)) { + if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName()) + return true; + if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName()) + return false; + } + return false; + }); + FeatureBitsets.erase( + std::unique(FeatureBitsets.begin(), FeatureBitsets.end()), + FeatureBitsets.end()); + OS << "#ifndef NDEBUG\n" + << "// Feature bitsets.\n" + << "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n" + << " CEFBS_None,\n"; + for (const auto &FeatureBitset : FeatureBitsets) { + if (FeatureBitset.empty()) + continue; + OS << " " << getNameForFeatureBitset(FeatureBitset) << ",\n"; + } + OS << "};\n\n" + << "static constexpr FeatureBitset FeatureBitsets[] = {\n" + << " {}, // CEFBS_None\n"; + for (const auto &FeatureBitset : FeatureBitsets) { + if (FeatureBitset.empty()) + continue; + OS << " {"; + for (const auto &Feature : FeatureBitset) { + const auto &I = SubtargetFeatures.find(Feature); + assert(I != SubtargetFeatures.end() && "Didn't import predicate?"); + OS << I->second.getEnumBitName() << ", "; + } + OS << "},\n"; + } + OS << "};\n" + << "#endif // NDEBUG\n\n"; + + // Emit the predicate verifier. + OS << "void verifyInstructionPredicates(\n" + << " unsigned Opcode, const FeatureBitset &Features) {\n" + << "#ifndef NDEBUG\n" + << " static " << getMinimalTypeForRange(FeatureBitsets.size()) + << " RequiredFeaturesRefs[] = {\n"; + unsigned InstIdx = 0; + for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { + OS << " CEFBS"; + unsigned NumPredicates = 0; + for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) { + const auto &I = SubtargetFeatures.find(Predicate); + if (I != SubtargetFeatures.end()) { + OS << '_' << I->second.TheDef->getName(); + NumPredicates++; + } + } + if (!NumPredicates) + OS << "_None"; + OS << ", // " << Inst->TheDef->getName() << " = " << InstIdx << "\n"; + InstIdx++; + } + OS << " };\n\n"; + OS << " assert(Opcode < " << InstIdx << ");\n"; + OS << " FeatureBitset AvailableFeatures = " + "computeAvailableFeatures(Features);\n"; + OS << " const FeatureBitset &RequiredFeatures = " + "FeatureBitsets[RequiredFeaturesRefs[Opcode]];\n"; + OS << " FeatureBitset MissingFeatures =\n" + << " (AvailableFeatures & RequiredFeatures) ^\n" + << " RequiredFeatures;\n" + << " if (MissingFeatures.any()) {\n" + << " std::ostringstream Msg;\n" + << " Msg << \"Attempting to emit \" << &" << Target.getName() + << "InstrNameData[" << Target.getName() << "InstrNameIndices[Opcode]]\n" + << " << \" instruction but the \";\n" + << " for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)\n" + << " if (MissingFeatures.test(i))\n" + << " Msg << SubtargetFeatureNames[i] << \" \";\n" + << " Msg << \"predicate(s) are not met\";\n" + << " report_fatal_error(Msg.str().c_str());\n" + << " }\n" + << "#endif // NDEBUG\n"; + OS << "}\n"; + OS << "} // end namespace " << Target.getName() << "_MC\n"; + OS << "} // end namespace llvm\n"; + OS << "#endif // ENABLE_INSTR_PREDICATE_VERIFIER\n\n"; } void InstrInfoEmitter::emitTIIHelperMethods(raw_ostream &OS, @@ -955,6 +1098,9 @@ void InstrInfoEmitter::run(raw_ostream &OS) { Records.startTimer("Emit helper methods"); emitMCIIHelperMethods(OS, TargetName); + + Records.startTimer("Emit verifier methods"); + emitFeatureVerifier(OS, Target); } void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num, diff --git a/llvm/utils/TableGen/SubtargetFeatureInfo.cpp b/llvm/utils/TableGen/SubtargetFeatureInfo.cpp index f4f360fb5be2..2a63fc490380 100644 --- a/llvm/utils/TableGen/SubtargetFeatureInfo.cpp +++ b/llvm/utils/TableGen/SubtargetFeatureInfo.cpp @@ -144,8 +144,13 @@ static bool emitFeaturesAux(StringRef TargetName, const Init &Val, void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( StringRef TargetName, StringRef ClassName, StringRef FuncName, SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) { - OS << "FeatureBitset " << TargetName << ClassName << "::\n" - << FuncName << "(const FeatureBitset &FB) const {\n"; + OS << "FeatureBitset "; + if (!ClassName.empty()) + OS << TargetName << ClassName << "::\n"; + OS << FuncName << "(const FeatureBitset &FB) "; + if (!ClassName.empty()) + OS << "const "; + OS << "{\n"; OS << " FeatureBitset Features;\n"; for (const auto &SF : SubtargetFeatures) { const SubtargetFeatureInfo &SFI = SF.second; diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports index 87989fe63092..92f9d2417159 100644 --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -397,6 +397,13 @@ kmpc_set_disp_num_buffers 267 __kmpc_end_scope 287 %endif +%ifndef stub + __kmpc_copyprivate_light 288 + __kmpc_sections_init 289 + __kmpc_next_section 290 + __kmpc_end_sections 291 +%endif + # User API entry points that have both lower- and upper- case versions for Fortran. # Number for lowercase version is indicated. Number for uppercase is obtained by adding 1000. # User API entry points are entry points that start with 'kmp_' or 'omp_'. diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index ac680e32d390..61ec737f9394 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3890,6 +3890,11 @@ KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *, KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid); +KMP_EXPORT kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid); +KMP_EXPORT kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid, + kmp_int32 numberOfSections); +KMP_EXPORT void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid); + KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int *plower, kmp_int *pupper, @@ -3903,6 +3908,9 @@ KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, void (*cpy_func)(void *, void *), kmp_int32 didit); +KMP_EXPORT void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, + void *cpy_data); + extern void KMPC_SET_NUM_THREADS(int arg); extern void KMPC_SET_DYNAMIC(int flag); extern void KMPC_SET_NESTED(int flag); diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index 863153bd5526..b7bcc4c94148 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -2224,6 +2224,61 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, } } +/* --------------------------------------------------------------------------*/ +/*! +@ingroup THREADPRIVATE +@param loc source location information +@param gtid global thread number +@param cpy_data pointer to the data to be saved/copied or 0 +@return the saved pointer to the data + +__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: +__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so +coming from single), and returns that pointer in all calls (for single thread +it's not needed). This version doesn't do any actual data copying. Data copying +has to be done somewhere else, e.g. inline in the generated code. Due to this, +this function doesn't have any barrier at the end of the function, like +__kmpc_copyprivate does, so generated code needs barrier after copying of all +data was done. +*/ +void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { + void **data_ptr; + + KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid)); + + KMP_MB(); + + data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; + + if (__kmp_env_consistency_check) { + if (loc == 0) { + KMP_WARNING(ConstructIdentInvalid); + } + } + + // ToDo: Optimize the following barrier + + if (cpy_data) + *data_ptr = cpy_data; + +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif +/* This barrier is not a barrier region boundary */ +#if USE_ITT_NOTIFY + __kmp_threads[gtid]->th.th_ident = loc; +#endif + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + + return *data_ptr; +} + /* -------------------------------------------------------------------------- */ #define INIT_LOCK __kmp_init_user_lock_with_checks @@ -4348,7 +4403,7 @@ void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, omp_allocator_handle_t free_allocator) { return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, - free_allocator); + free_allocator); } void omp_free(void *ptr, omp_allocator_handle_t allocator) { diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp index 624fbb04d7a8..e7d28c6587b1 100644 --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -2285,6 +2285,219 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, return status; } +/*! +@ingroup WORK_SHARING +@param loc source location information +@param global_tid global thread number +@return Zero if the parallel region is not active and this thread should execute +all sections, non-zero otherwise. + +Beginning of sections construct. +There are no implicit barriers in the "sections" calls, rather the compiler +should introduce an explicit barrier if it is required. + +This implementation is based on __kmp_dispatch_init, using same constructs for +shared data (we can't have sections nested directly in omp for loop, there +should be a parallel region in between) +*/ +kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid) { + + int active; + kmp_info_t *th; + kmp_team_t *team; + kmp_uint32 my_buffer_index; + dispatch_shared_info_template<kmp_int32> volatile *sh; + + KMP_DEBUG_ASSERT(__kmp_init_serial); + + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + __kmp_resume_if_soft_paused(); + + /* setup data */ + th = __kmp_threads[gtid]; + team = th->th.th_team; + active = !team->t.t_serialized; + th->th.th_ident = loc; + + KMP_COUNT_BLOCK(OMP_SECTIONS); + KD_TRACE(10, ("__kmpc_sections: called by T#%d\n", gtid)); + + if (active) { + // Setup sections in the same way as dynamic scheduled loops. + // We need one shared data: which section is to execute next. + // (in case parallel is not active, all sections will be executed on the + // same thread) + KMP_DEBUG_ASSERT(th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); + + my_buffer_index = th->th.th_dispatch->th_disp_index++; + + // reuse shared data structures from dynamic sched loops: + sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>( + &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); + KD_TRACE(10, ("__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid, + my_buffer_index)); + + th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error; + th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error; + + KD_TRACE(100, ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d " + "sh->buffer_index:%d\n", + gtid, my_buffer_index, sh->buffer_index)); + __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index, + __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL)); + // Note: KMP_WAIT() cannot be used there: buffer index and + // my_buffer_index are *always* 32-bit integers. + KMP_MB(); + KD_TRACE(100, ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d " + "sh->buffer_index:%d\n", + gtid, my_buffer_index, sh->buffer_index)); + + th->th.th_dispatch->th_dispatch_pr_current = + nullptr; // sections construct doesn't need private data + th->th.th_dispatch->th_dispatch_sh_current = + CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh); + } + +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_sections, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); + } +#endif + KMP_PUSH_PARTITIONED_TIMER(OMP_sections); + + return active; +} + +/*! +@ingroup WORK_SHARING +@param loc source location information +@param global_tid global thread number +@param numberOfSections number of sections in the 'sections' construct +@return unsigned [from 0 to n) - number (id) of the section to execute next on +this thread. n (or any other number not in range) - nothing to execute on this +thread +*/ + +kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid, + kmp_int32 numberOfSections) { + + KMP_TIME_PARTITIONED_BLOCK(OMP_sections); + + kmp_info_t *th = __kmp_threads[gtid]; +#ifdef KMP_DEBUG + kmp_team_t *team = th->th.th_team; +#endif + + KD_TRACE(1000, ("__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid, + numberOfSections)); + + // For serialized case we should not call this function: + KMP_DEBUG_ASSERT(!team->t.t_serialized); + + dispatch_shared_info_template<kmp_int32> volatile *sh; + + KMP_DEBUG_ASSERT(th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); + + KMP_DEBUG_ASSERT(!(th->th.th_dispatch->th_dispatch_pr_current)); + sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>( + th->th.th_dispatch->th_dispatch_sh_current); + KMP_DEBUG_ASSERT(sh); + + kmp_int32 sectionIndex = 0; + bool moreSectionsToExecute = true; + + // Find section to execute: + sectionIndex = test_then_inc<kmp_int32>((kmp_int32 *)&sh->u.s.iteration); + if (sectionIndex >= numberOfSections) { + moreSectionsToExecute = false; + } + + // status == 0: no more sections to execute; + // OMPTODO: __kmpc_end_sections could be bypassed? + if (!moreSectionsToExecute) { + kmp_int32 num_done; + + num_done = test_then_inc<kmp_int32>((kmp_int32 *)(&sh->u.s.num_done)); + + if (num_done == th->th.th_team_nproc - 1) { + /* NOTE: release this buffer to be reused */ + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + sh->u.s.num_done = 0; + sh->u.s.iteration = 0; + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + sh->buffer_index += __kmp_dispatch_num_buffers; + KD_TRACE(100, ("__kmpc_next_section: T#%d change buffer_index:%d\n", gtid, + sh->buffer_index)); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + } // if + + th->th.th_dispatch->th_deo_fcn = NULL; + th->th.th_dispatch->th_dxo_fcn = NULL; + th->th.th_dispatch->th_dispatch_sh_current = NULL; + th->th.th_dispatch->th_dispatch_pr_current = NULL; + +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_dispatch) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + ompt_data_t instance = ompt_data_none; + instance.ptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_callbacks.ompt_callback(ompt_callback_dispatch)( + &(team_info->parallel_data), &(task_info->task_data), + ompt_dispatch_section, instance); + } +#endif + KMP_POP_PARTITIONED_TIMER(); + } + + return sectionIndex; +} + +/*! +@ingroup WORK_SHARING +@param loc source location information +@param global_tid global thread number + +End of "sections" construct. +Don't need to wait here: barrier is added separately when needed. +*/ +void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid) { + + kmp_info_t *th = __kmp_threads[gtid]; + int active = !th->th.th_team->t.t_serialized; + + KD_TRACE(100, ("__kmpc_end_sections: T#%d called\n", gtid)); + + if (!active) { + // In active case call finalization is done in __kmpc_next_section +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_sections, ompt_scope_end, &(team_info->parallel_data), + &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); + } +#endif + KMP_POP_PARTITIONED_TIMER(); + } + + KD_TRACE(100, ("__kmpc_end_sections: T#%d returned\n", gtid)); +} + template <typename T> static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid, kmp_int32 *plastiter, T *plower, T *pupper, |