diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-08-06 10:54:55 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-08-06 10:54:55 +0000 |
commit | 9b9503334fa856ed4ed6823d35b6f52546296f77 (patch) | |
tree | 3214bef030da1bd5ee3405a631aeb837e61c1e99 | |
parent | c76260f306a7e51ef52dc75c6031e51e1e0862d7 (diff) |
88 files changed, 1129 insertions, 723 deletions
diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index fb52ac804849..0923736a95f9 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -1210,6 +1210,13 @@ public: return D.HasPublicFields || D.HasProtectedFields || D.HasPrivateFields; } + /// If this is a standard-layout class or union, any and all data members will + /// be declared in the same type. + /// + /// This retrieves the type where any fields are declared, + /// or the current class if there is no class with fields. + const CXXRecordDecl *getStandardLayoutBaseWithFields() const; + /// Whether this class is polymorphic (C++ [class.virtual]), /// which means that the class contains or inherits a virtual function. bool isPolymorphic() const { return data().Polymorphic; } diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 4825979a974d..46d0a66d59c3 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -477,6 +477,9 @@ def TargetELF : TargetSpec { def TargetELFOrMachO : TargetSpec { let ObjectFormats = ["ELF", "MachO"]; } +def TargetWindowsArm64EC : TargetSpec { + let CustomCode = [{ Target.getTriple().isWindowsArm64EC() }]; +} def TargetSupportsInitPriority : TargetSpec { let CustomCode = [{ !Target.getTriple().isOSzOS() }]; @@ -4027,6 +4030,12 @@ def SelectAny : InheritableAttr { let SimpleHandler = 1; } +def HybridPatchable : InheritableAttr, TargetSpecificAttr<TargetWindowsArm64EC> { + let Spellings = [Declspec<"hybrid_patchable">, Clang<"hybrid_patchable">]; + let Subjects = SubjectList<[Function]>; + let Documentation = [HybridPatchableDocs]; +} + def Thread : Attr { let Spellings = [Declspec<"thread">]; let LangOpts = [MicrosoftExt]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 99738812c815..b5d468eb5ec9 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -5985,6 +5985,16 @@ For more information see or `msvc documentation <https://docs.microsoft.com/pl-pl/cpp/cpp/selectany>`_. }]; } +def HybridPatchableDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``hybrid_patchable`` attribute declares an ARM64EC function with an additional +x86-64 thunk, which may be patched at runtime. + +For more information see +`ARM64EC ABI documentation <https://learn.microsoft.com/en-us/windows/arm/arm64ec-abi>`_. +}]; } + def WebAssemblyExportNameDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 12a4617c64d8..8a1462c670d6 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -288,6 +288,9 @@ def err_function_needs_feature : Error< let CategoryName = "Codegen ABI Check" in { def err_function_always_inline_attribute_mismatch : Error< "always_inline function %1 and its caller %0 have mismatching %2 attributes">; +def warn_function_always_inline_attribute_mismatch : Warning< + "always_inline function %1 and its caller %0 have mismatching %2 attributes, " + "inlining may change runtime behaviour">, InGroup<AArch64SMEAttributes>; def err_function_always_inline_new_za : Error< "always_inline function %0 has new za state">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index eb0506e71fe3..8a00fe21a08c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3677,6 +3677,9 @@ def err_attribute_weak_static : Error< "weak declaration cannot have internal linkage">; def err_attribute_selectany_non_extern_data : Error< "'selectany' can only be applied to data items with external linkage">; +def warn_attribute_hybrid_patchable_non_extern : Warning< + "'hybrid_patchable' is ignored on functions without external linkage">, + InGroup<IgnoredAttributes>; def err_declspec_thread_on_thread_variable : Error< "'__declspec(thread)' applied to variable that already has a " "thread-local storage specifier">; @@ -3808,8 +3811,6 @@ def warn_sme_locally_streaming_has_vl_args_returns : Warning< InGroup<AArch64SMEAttributes>, DefaultIgnore; def err_conflicting_attributes_arm_state : Error< "conflicting attributes for state '%0'">; -def err_sme_streaming_cannot_be_multiversioned : Error< - "streaming function cannot be multi-versioned">; def err_unknown_arm_state : Error< "unknown state '%0'">; def err_missing_arm_state : Error< diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index b573c2713a3a..9a3ede426e91 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -561,6 +561,42 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { data().StructuralIfLiteral = false; } +const CXXRecordDecl *CXXRecordDecl::getStandardLayoutBaseWithFields() const { + assert( + isStandardLayout() && + "getStandardLayoutBaseWithFields called on a non-standard-layout type"); +#ifdef EXPENSIVE_CHECKS + { + unsigned NumberOfBasesWithFields = 0; + if (!field_empty()) + ++NumberOfBasesWithFields; + llvm::SmallPtrSet<const CXXRecordDecl *, 8> UniqueBases; + forallBases([&](const CXXRecordDecl *Base) -> bool { + if (!Base->field_empty()) + ++NumberOfBasesWithFields; + assert( + UniqueBases.insert(Base->getCanonicalDecl()).second && + "Standard layout struct has multiple base classes of the same type"); + return true; + }); + assert(NumberOfBasesWithFields <= 1 && + "Standard layout struct has fields declared in more than one class"); + } +#endif + if (!field_empty()) + return this; + const CXXRecordDecl *Result = this; + forallBases([&](const CXXRecordDecl *Base) -> bool { + if (!Base->field_empty()) { + // This is the base where the fields are declared; return early + Result = Base; + return false; + } + return true; + }); + return Result; +} + bool CXXRecordDecl::hasConstexprDestructor() const { auto *Dtor = getDestructor(); return Dtor ? Dtor->isConstexpr() : defaultedDestructorIsConstexpr(); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index d6078696a7d9..af201554898f 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -991,6 +991,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (D && D->hasAttr<NoProfileFunctionAttr>()) Fn->addFnAttr(llvm::Attribute::NoProfile); + if (D && D->hasAttr<HybridPatchableAttr>()) + Fn->addFnAttr(llvm::Attribute::HybridPatchable); + if (D) { // Function attributes take precedence over command line flags. if (auto *A = D->getAttr<FunctionReturnThunksAttr>()) { diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index b9df54b0c67c..1dec3cd40ebd 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -883,8 +883,10 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming( if (!CalleeIsStreamingCompatible && (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) - CGM.getDiags().Report(CallLoc, - diag::err_function_always_inline_attribute_mismatch) + CGM.getDiags().Report( + CallLoc, CalleeIsStreaming + ? diag::err_function_always_inline_attribute_mismatch + : diag::warn_function_always_inline_attribute_mismatch) << Caller->getDeclName() << Callee->getDeclName() << "streaming"; if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) if (NewAttr->isNewZA()) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 5de29f1eca61..366b147a052b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1847,6 +1847,9 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, Args.addOptInFlag( CmdArgs, options::OPT_fptrauth_vtable_pointer_type_discrimination, options::OPT_fno_ptrauth_vtable_pointer_type_discrimination); + Args.addOptInFlag( + CmdArgs, options::OPT_fptrauth_type_info_vtable_pointer_discrimination, + options::OPT_fno_ptrauth_type_info_vtable_pointer_discrimination); Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_init_fini, options::OPT_fno_ptrauth_init_fini); Args.addOptInFlag( diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 21924a8fe17d..63c8699fd62d 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -154,8 +154,8 @@ private: if (NonTemplateLess.count(CurrentToken->Previous) > 0) return false; - const FormatToken &Previous = *CurrentToken->Previous; // The '<'. - if (Previous.Previous) { + if (const auto &Previous = *CurrentToken->Previous; // The '<'. + Previous.Previous) { if (Previous.Previous->Tok.isLiteral()) return false; if (Previous.Previous->is(tok::r_brace)) @@ -175,11 +175,13 @@ private: FormatToken *Left = CurrentToken->Previous; Left->ParentBracket = Contexts.back().ContextKind; ScopedContextCreator ContextCreator(*this, tok::less, 12); - Contexts.back().IsExpression = false; + + const auto *BeforeLess = Left->Previous; + // If there's a template keyword before the opening angle bracket, this is a // template parameter, not an argument. - if (Left->Previous && Left->Previous->isNot(tok::kw_template)) + if (BeforeLess && BeforeLess->isNot(tok::kw_template)) Contexts.back().ContextType = Context::TemplateArgument; if (Style.Language == FormatStyle::LK_Java && @@ -187,19 +189,24 @@ private: next(); } - while (CurrentToken) { + for (bool SeenTernaryOperator = false; CurrentToken;) { + const bool InExpr = Contexts[Contexts.size() - 2].IsExpression; if (CurrentToken->is(tok::greater)) { + const auto *Next = CurrentToken->Next; // Try to do a better job at looking for ">>" within the condition of // a statement. Conservatively insert spaces between consecutive ">" // tokens to prevent splitting right bitshift operators and potentially // altering program semantics. This check is overly conservative and // will prevent spaces from being inserted in select nested template // parameter cases, but should not alter program semantics. - if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) && + if (Next && Next->is(tok::greater) && Left->ParentBracket != tok::less && CurrentToken->getStartOfNonWhitespace() == - CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset( - -1)) { + Next->getStartOfNonWhitespace().getLocWithOffset(-1)) { + return false; + } + if (InExpr && SeenTernaryOperator && + (!Next || !Next->isOneOf(tok::l_paren, tok::l_brace))) { return false; } Left->MatchingParen = CurrentToken; @@ -210,14 +217,14 @@ private: // msg: < item: data > // In TT_TextProto, map<key, value> does not occur. if (Style.Language == FormatStyle::LK_TextProto || - (Style.Language == FormatStyle::LK_Proto && Left->Previous && - Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) { + (Style.Language == FormatStyle::LK_Proto && BeforeLess && + BeforeLess->isOneOf(TT_SelectorName, TT_DictLiteral))) { CurrentToken->setType(TT_DictLiteral); } else { CurrentToken->setType(TT_TemplateCloser); CurrentToken->Tok.setLength(1); } - if (CurrentToken->Next && CurrentToken->Next->Tok.isLiteral()) + if (Next && Next->Tok.isLiteral()) return false; next(); return true; @@ -229,18 +236,21 @@ private: } if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace)) return false; + const auto &Prev = *CurrentToken->Previous; // If a && or || is found and interpreted as a binary operator, this set // of angles is likely part of something like "a < b && c > d". If the // angles are inside an expression, the ||/&& might also be a binary // operator that was misinterpreted because we are parsing template // parameters. // FIXME: This is getting out of hand, write a decent parser. - if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && - CurrentToken->Previous->is(TT_BinaryOperator) && - Contexts[Contexts.size() - 2].IsExpression && - !Line.startsWith(tok::kw_template)) { - return false; + if (InExpr && !Line.startsWith(tok::kw_template) && + Prev.is(TT_BinaryOperator)) { + const auto Precedence = Prev.getPrecedence(); + if (Precedence > prec::Conditional && Precedence < prec::Relational) + return false; } + if (Prev.is(TT_ConditionalExpr)) + SeenTernaryOperator = true; updateParameterCount(Left, CurrentToken); if (Style.Language == FormatStyle::LK_Proto) { if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) { @@ -372,6 +382,10 @@ private: OpeningParen.Previous->is(tok::kw__Generic)) { Contexts.back().ContextType = Context::C11GenericSelection; Contexts.back().IsExpression = true; + } else if (Line.InPPDirective && + (!OpeningParen.Previous || + OpeningParen.Previous->isNot(tok::identifier))) { + Contexts.back().IsExpression = true; } else if (Contexts[Contexts.size() - 2].CaretFound) { // This is the parameter list of an ObjC block. Contexts.back().IsExpression = false; @@ -384,20 +398,7 @@ private: OpeningParen.Previous->MatchingParen->isOneOf( TT_ObjCBlockLParen, TT_FunctionTypeLParen)) { Contexts.back().IsExpression = false; - } else if (Line.InPPDirective) { - auto IsExpr = [&OpeningParen] { - const auto *Tok = OpeningParen.Previous; - if (!Tok || Tok->isNot(tok::identifier)) - return true; - Tok = Tok->Previous; - while (Tok && Tok->endsSequence(tok::coloncolon, tok::identifier)) { - assert(Tok->Previous); - Tok = Tok->Previous->Previous; - } - return !Tok || !Tok->Tok.getIdentifierInfo(); - }; - Contexts.back().IsExpression = IsExpr(); - } else if (!Line.MustBeDeclaration) { + } else if (!Line.MustBeDeclaration && !Line.InPPDirective) { bool IsForOrCatch = OpeningParen.Previous && OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch); diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index 5c8ef564f30a..112cf3d08182 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "CheckExprLifetime.h" +#include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Initialization.h" @@ -548,6 +549,14 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path, EnableLifetimeWarnings); } + if (auto *M = dyn_cast<MemberExpr>(Init)) { + // Lifetime of a non-reference type field is same as base object. + if (auto *F = dyn_cast<FieldDecl>(M->getMemberDecl()); + F && !F->getType()->isReferenceType()) + visitLocalsRetainedByInitializer(Path, M->getBase(), Visit, true, + EnableLifetimeWarnings); + } + if (isa<CallExpr>(Init)) { if (EnableLifetimeWarnings) handleGslAnnotatedTypes(Path, Init, Visit); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index cf1196ad23c2..9088b5e285bf 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -13664,10 +13664,11 @@ void Sema::DiagnoseSelfMove(const Expr *LHSExpr, const Expr *RHSExpr, //===--- Layout compatibility ----------------------------------------------// -static bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2); +static bool isLayoutCompatible(const ASTContext &C, QualType T1, QualType T2); /// Check if two enumeration types are layout-compatible. -static bool isLayoutCompatible(ASTContext &C, EnumDecl *ED1, EnumDecl *ED2) { +static bool isLayoutCompatible(const ASTContext &C, const EnumDecl *ED1, + const EnumDecl *ED2) { // C++11 [dcl.enum] p8: // Two enumeration types are layout-compatible if they have the same // underlying type. @@ -13678,8 +13679,8 @@ static bool isLayoutCompatible(ASTContext &C, EnumDecl *ED1, EnumDecl *ED2) { /// Check if two fields are layout-compatible. /// Can be used on union members, which are exempt from alignment requirement /// of common initial sequence. -static bool isLayoutCompatible(ASTContext &C, FieldDecl *Field1, - FieldDecl *Field2, +static bool isLayoutCompatible(const ASTContext &C, const FieldDecl *Field1, + const FieldDecl *Field2, bool AreUnionMembers = false) { [[maybe_unused]] const Type *Field1Parent = Field1->getParent()->getTypeForDecl(); @@ -13722,60 +13723,33 @@ static bool isLayoutCompatible(ASTContext &C, FieldDecl *Field1, /// Check if two standard-layout structs are layout-compatible. /// (C++11 [class.mem] p17) -static bool isLayoutCompatibleStruct(ASTContext &C, RecordDecl *RD1, - RecordDecl *RD2) { - // If both records are C++ classes, check that base classes match. - if (const CXXRecordDecl *D1CXX = dyn_cast<CXXRecordDecl>(RD1)) { - // If one of records is a CXXRecordDecl we are in C++ mode, - // thus the other one is a CXXRecordDecl, too. - const CXXRecordDecl *D2CXX = cast<CXXRecordDecl>(RD2); - // Check number of base classes. - if (D1CXX->getNumBases() != D2CXX->getNumBases()) - return false; +static bool isLayoutCompatibleStruct(const ASTContext &C, const RecordDecl *RD1, + const RecordDecl *RD2) { + // Get to the class where the fields are declared + if (const CXXRecordDecl *D1CXX = dyn_cast<CXXRecordDecl>(RD1)) + RD1 = D1CXX->getStandardLayoutBaseWithFields(); - // Check the base classes. - for (CXXRecordDecl::base_class_const_iterator - Base1 = D1CXX->bases_begin(), - BaseEnd1 = D1CXX->bases_end(), - Base2 = D2CXX->bases_begin(); - Base1 != BaseEnd1; - ++Base1, ++Base2) { - if (!isLayoutCompatible(C, Base1->getType(), Base2->getType())) - return false; - } - } else if (const CXXRecordDecl *D2CXX = dyn_cast<CXXRecordDecl>(RD2)) { - // If only RD2 is a C++ class, it should have zero base classes. - if (D2CXX->getNumBases() > 0) - return false; - } + if (const CXXRecordDecl *D2CXX = dyn_cast<CXXRecordDecl>(RD2)) + RD2 = D2CXX->getStandardLayoutBaseWithFields(); // Check the fields. - RecordDecl::field_iterator Field2 = RD2->field_begin(), - Field2End = RD2->field_end(), - Field1 = RD1->field_begin(), - Field1End = RD1->field_end(); - for ( ; Field1 != Field1End && Field2 != Field2End; ++Field1, ++Field2) { - if (!isLayoutCompatible(C, *Field1, *Field2)) - return false; - } - if (Field1 != Field1End || Field2 != Field2End) - return false; - - return true; + return llvm::equal(RD1->fields(), RD2->fields(), + [&C](const FieldDecl *F1, const FieldDecl *F2) -> bool { + return isLayoutCompatible(C, F1, F2); + }); } /// Check if two standard-layout unions are layout-compatible. /// (C++11 [class.mem] p18) -static bool isLayoutCompatibleUnion(ASTContext &C, RecordDecl *RD1, - RecordDecl *RD2) { - llvm::SmallPtrSet<FieldDecl *, 8> UnmatchedFields; +static bool isLayoutCompatibleUnion(const ASTContext &C, const RecordDecl *RD1, + const RecordDecl *RD2) { + llvm::SmallPtrSet<const FieldDecl *, 8> UnmatchedFields; for (auto *Field2 : RD2->fields()) UnmatchedFields.insert(Field2); for (auto *Field1 : RD1->fields()) { - llvm::SmallPtrSet<FieldDecl *, 8>::iterator - I = UnmatchedFields.begin(), - E = UnmatchedFields.end(); + auto I = UnmatchedFields.begin(); + auto E = UnmatchedFields.end(); for ( ; I != E; ++I) { if (isLayoutCompatible(C, Field1, *I, /*IsUnionMember=*/true)) { @@ -13792,8 +13766,8 @@ static bool isLayoutCompatibleUnion(ASTContext &C, RecordDecl *RD1, return UnmatchedFields.empty(); } -static bool isLayoutCompatible(ASTContext &C, RecordDecl *RD1, - RecordDecl *RD2) { +static bool isLayoutCompatible(const ASTContext &C, const RecordDecl *RD1, + const RecordDecl *RD2) { if (RD1->isUnion() != RD2->isUnion()) return false; @@ -13804,7 +13778,7 @@ static bool isLayoutCompatible(ASTContext &C, RecordDecl *RD1, } /// Check if two types are layout-compatible in C++11 sense. -static bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2) { +static bool isLayoutCompatible(const ASTContext &C, QualType T1, QualType T2) { if (T1.isNull() || T2.isNull()) return false; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index bb25a0b3a45a..01231f8e385e 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6890,6 +6890,11 @@ static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) { } } + if (HybridPatchableAttr *Attr = ND.getAttr<HybridPatchableAttr>()) { + if (!ND.isExternallyVisible()) + S.Diag(Attr->getLocation(), + diag::warn_attribute_hybrid_patchable_non_extern); + } if (const InheritableAttr *Attr = getDLLAttr(&ND)) { auto *VD = dyn_cast<VarDecl>(&ND); bool IsAnonymousNS = false; @@ -11009,6 +11014,9 @@ static bool AttrCompatibleWithMultiVersion(attr::Kind Kind, switch (Kind) { default: return false; + case attr::ArmLocallyStreaming: + return MVKind == MultiVersionKind::TargetVersion || + MVKind == MultiVersionKind::TargetClones; case attr::Used: return MVKind == MultiVersionKind::Target; case attr::NonNull: @@ -11145,7 +11153,21 @@ bool Sema::areMultiversionVariantFunctionsCompatible( FunctionType::ExtInfo OldTypeInfo = OldType->getExtInfo(); FunctionType::ExtInfo NewTypeInfo = NewType->getExtInfo(); - if (OldTypeInfo.getCC() != NewTypeInfo.getCC()) + const auto *OldFPT = OldFD->getType()->getAs<FunctionProtoType>(); + const auto *NewFPT = NewFD->getType()->getAs<FunctionProtoType>(); + + bool ArmStreamingCCMismatched = false; + if (OldFPT && NewFPT) { + unsigned Diff = + OldFPT->getAArch64SMEAttributes() ^ NewFPT->getAArch64SMEAttributes(); + // Arm-streaming, arm-streaming-compatible and non-streaming versions + // cannot be mixed. + if (Diff & (FunctionType::SME_PStateSMEnabledMask | + FunctionType::SME_PStateSMCompatibleMask)) + ArmStreamingCCMismatched = true; + } + + if (OldTypeInfo.getCC() != NewTypeInfo.getCC() || ArmStreamingCCMismatched) return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << CallingConv; QualType OldReturnType = OldType->getReturnType(); @@ -11165,9 +11187,8 @@ bool Sema::areMultiversionVariantFunctionsCompatible( if (!CLinkageMayDiffer && OldFD->isExternC() != NewFD->isExternC()) return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << LanguageLinkage; - if (CheckEquivalentExceptionSpec( - OldFD->getType()->getAs<FunctionProtoType>(), OldFD->getLocation(), - NewFD->getType()->getAs<FunctionProtoType>(), NewFD->getLocation())) + if (CheckEquivalentExceptionSpec(OldFPT, OldFD->getLocation(), NewFPT, + NewFD->getLocation())) return true; } return false; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 5fd8622c90dd..e2eada24f9fc 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3024,9 +3024,6 @@ bool Sema::checkTargetVersionAttr(SourceLocation LiteralLoc, Decl *D, return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << CurFeature << TargetVersion; } - if (IsArmStreamingFunction(cast<FunctionDecl>(D), - /*IncludeLocallyStreaming=*/false)) - return Diag(LiteralLoc, diag::err_sme_streaming_cannot_be_multiversioned); return false; } @@ -3123,10 +3120,6 @@ bool Sema::checkTargetClonesAttrString( HasNotDefault = true; } } - if (IsArmStreamingFunction(cast<FunctionDecl>(D), - /*IncludeLocallyStreaming=*/false)) - return Diag(LiteralLoc, - diag::err_sme_streaming_cannot_be_multiversioned); } else { // Other targets ( currently X86 ) if (Cur.starts_with("arch=")) { @@ -6868,6 +6861,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_MSConstexpr: handleMSConstexprAttr(S, D, AL); break; + case ParsedAttr::AT_HybridPatchable: + handleSimpleAttribute<HybridPatchableAttr>(S, D, AL); + break; // HLSL attributes: case ParsedAttr::AT_HLSLNumThreads: diff --git a/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp index 40f7e9cede1f..4cd2f2802f30 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp @@ -147,10 +147,18 @@ using MutexDescriptor = class BlockInCriticalSectionChecker : public Checker<check::PostCall> { private: const std::array<MutexDescriptor, 8> MutexDescriptors{ - MemberMutexDescriptor({/*MatchAs=*/CDM::CXXMethod, - /*QualifiedName=*/{"std", "mutex", "lock"}, - /*RequiredArgs=*/0}, - {CDM::CXXMethod, {"std", "mutex", "unlock"}, 0}), + // NOTE: There are standard library implementations where some methods + // of `std::mutex` are inherited from an implementation detail base + // class, and those aren't matched by the name specification {"std", + // "mutex", "lock"}. + // As a workaround here we omit the class name and only require the + // presence of the name parts "std" and "lock"/"unlock". + // TODO: Ensure that CallDescription understands inherited methods. + MemberMutexDescriptor( + {/*MatchAs=*/CDM::CXXMethod, + /*QualifiedName=*/{"std", /*"mutex",*/ "lock"}, + /*RequiredArgs=*/0}, + {CDM::CXXMethod, {"std", /*"mutex",*/ "unlock"}, 0}), FirstArgMutexDescriptor({CDM::CLibrary, {"pthread_mutex_lock"}, 1}, {CDM::CLibrary, {"pthread_mutex_unlock"}, 1}), FirstArgMutexDescriptor({CDM::CLibrary, {"mtx_lock"}, 1}, diff --git a/clang/lib/StaticAnalyzer/Core/Store.cpp b/clang/lib/StaticAnalyzer/Core/Store.cpp index 67ca61bb56ba..b436dd746d21 100644 --- a/clang/lib/StaticAnalyzer/Core/Store.cpp +++ b/clang/lib/StaticAnalyzer/Core/Store.cpp @@ -472,7 +472,17 @@ SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset, const auto *ElemR = dyn_cast<ElementRegion>(BaseRegion); // Convert the offset to the appropriate size and signedness. - Offset = svalBuilder.convertToArrayIndex(Offset).castAs<NonLoc>(); + auto Off = svalBuilder.convertToArrayIndex(Offset).getAs<NonLoc>(); + if (!Off) { + // Handle cases when LazyCompoundVal is used for an array index. + // Such case is possible if code does: + // char b[4]; + // a[__builtin_bitcast(int, b)]; + // Return UnknownVal, since we cannot model it. + return UnknownVal(); + } + + Offset = Off.value(); if (!ElemR) { // If the base region is not an ElementRegion, create one. diff --git a/compiler-rt/lib/builtins/riscv/feature_bits.c b/compiler-rt/lib/builtins/riscv/feature_bits.c deleted file mode 100644 index 77422935bd2d..000000000000 --- a/compiler-rt/lib/builtins/riscv/feature_bits.c +++ /dev/null @@ -1,298 +0,0 @@ -//=== feature_bits.c - Update RISC-V Feature Bits Structure -*- C -*-=========// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#define RISCV_FEATURE_BITS_LENGTH 1 -struct { - unsigned length; - unsigned long long features[RISCV_FEATURE_BITS_LENGTH]; -} __riscv_feature_bits __attribute__((visibility("hidden"), nocommon)); - -#define RISCV_VENDOR_FEATURE_BITS_LENGTH 1 -struct { - unsigned vendorID; - unsigned length; - unsigned long long features[RISCV_VENDOR_FEATURE_BITS_LENGTH]; -} __riscv_vendor_feature_bits __attribute__((visibility("hidden"), nocommon)); - -// NOTE: Should sync-up with RISCVFeatures.td -// TODO: Maybe generate a header from tablegen then include it. -#define A_GROUPID 0 -#define A_BITMASK (1ULL << 0) -#define C_GROUPID 0 -#define C_BITMASK (1ULL << 2) -#define D_GROUPID 0 -#define D_BITMASK (1ULL << 3) -#define F_GROUPID 0 -#define F_BITMASK (1ULL << 5) -#define I_GROUPID 0 -#define I_BITMASK (1ULL << 8) -#define M_GROUPID 0 -#define M_BITMASK (1ULL << 12) -#define V_GROUPID 0 -#define V_BITMASK (1ULL << 21) -#define ZACAS_GROUPID 0 -#define ZACAS_BITMASK (1ULL << 26) -#define ZBA_GROUPID 0 -#define ZBA_BITMASK (1ULL << 27) -#define ZBB_GROUPID 0 -#define ZBB_BITMASK (1ULL << 28) -#define ZBC_GROUPID 0 -#define ZBC_BITMASK (1ULL << 29) -#define ZBKB_GROUPID 0 -#define ZBKB_BITMASK (1ULL << 30) -#define ZBKC_GROUPID 0 -#define ZBKC_BITMASK (1ULL << 31) -#define ZBKX_GROUPID 0 -#define ZBKX_BITMASK (1ULL << 32) -#define ZBS_GROUPID 0 -#define ZBS_BITMASK (1ULL << 33) -#define ZFA_GROUPID 0 -#define ZFA_BITMASK (1ULL << 34) -#define ZFH_GROUPID 0 -#define ZFH_BITMASK (1ULL << 35) -#define ZFHMIN_GROUPID 0 -#define ZFHMIN_BITMASK (1ULL << 36) -#define ZICBOZ_GROUPID 0 -#define ZICBOZ_BITMASK (1ULL << 37) -#define ZICOND_GROUPID 0 -#define ZICOND_BITMASK (1ULL << 38) -#define ZIHINTNTL_GROUPID 0 -#define ZIHINTNTL_BITMASK (1ULL << 39) -#define ZIHINTPAUSE_GROUPID 0 -#define ZIHINTPAUSE_BITMASK (1ULL << 40) -#define ZKND_GROUPID 0 -#define ZKND_BITMASK (1ULL << 41) -#define ZKNE_GROUPID 0 -#define ZKNE_BITMASK (1ULL << 42) -#define ZKNH_GROUPID 0 -#define ZKNH_BITMASK (1ULL << 43) -#define ZKSED_GROUPID 0 -#define ZKSED_BITMASK (1ULL << 44) -#define ZKSH_GROUPID 0 -#define ZKSH_BITMASK (1ULL << 45) -#define ZKT_GROUPID 0 -#define ZKT_BITMASK (1ULL << 46) -#define ZTSO_GROUPID 0 -#define ZTSO_BITMASK (1ULL << 47) -#define ZVBB_GROUPID 0 -#define ZVBB_BITMASK (1ULL << 48) -#define ZVBC_GROUPID 0 -#define ZVBC_BITMASK (1ULL << 49) -#define ZVFH_GROUPID 0 -#define ZVFH_BITMASK (1ULL << 50) -#define ZVFHMIN_GROUPID 0 -#define ZVFHMIN_BITMASK (1ULL << 51) -#define ZVKB_GROUPID 0 -#define ZVKB_BITMASK (1ULL << 52) -#define ZVKG_GROUPID 0 -#define ZVKG_BITMASK (1ULL << 53) -#define ZVKNED_GROUPID 0 -#define ZVKNED_BITMASK (1ULL << 54) -#define ZVKNHA_GROUPID 0 -#define ZVKNHA_BITMASK (1ULL << 55) -#define ZVKNHB_GROUPID 0 -#define ZVKNHB_BITMASK (1ULL << 56) -#define ZVKSED_GROUPID 0 -#define ZVKSED_BITMASK (1ULL << 57) -#define ZVKSH_GROUPID 0 -#define ZVKSH_BITMASK (1ULL << 58) -#define ZVKT_GROUPID 0 -#define ZVKT_BITMASK (1ULL << 59) - -#if defined(__linux__) - -static long syscall_impl_5_args(long number, long arg1, long arg2, long arg3, - long arg4, long arg5) { - register long a7 __asm__("a7") = number; - register long a0 __asm__("a0") = arg1; - register long a1 __asm__("a1") = arg2; - register long a2 __asm__("a2") = arg3; - register long a3 __asm__("a3") = arg4; - register long a4 __asm__("a4") = arg5; - __asm__ __volatile__("ecall\n\t" - : "=r"(a0) - : "r"(a7), "r"(a0), "r"(a1), "r"(a2), "r"(a3), "r"(a4) - : "memory"); - return a0; -} - -#define RISCV_HWPROBE_KEY_MVENDORID 0 -#define RISCV_HWPROBE_KEY_MARCHID 1 -#define RISCV_HWPROBE_KEY_MIMPID 2 -#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3 -#define RISCV_HWPROBE_BASE_BEHAVIOR_IMA (1ULL << 0) -#define RISCV_HWPROBE_KEY_IMA_EXT_0 4 -#define RISCV_HWPROBE_IMA_FD (1ULL << 0) -#define RISCV_HWPROBE_IMA_C (1ULL << 1) -#define RISCV_HWPROBE_IMA_V (1ULL << 2) -#define RISCV_HWPROBE_EXT_ZBA (1ULL << 3) -#define RISCV_HWPROBE_EXT_ZBB (1ULL << 4) -#define RISCV_HWPROBE_EXT_ZBS (1ULL << 5) -#define RISCV_HWPROBE_EXT_ZICBOZ (1ULL << 6) -#define RISCV_HWPROBE_EXT_ZBC (1ULL << 7) -#define RISCV_HWPROBE_EXT_ZBKB (1ULL << 8) -#define RISCV_HWPROBE_EXT_ZBKC (1ULL << 9) -#define RISCV_HWPROBE_EXT_ZBKX (1ULL << 10) -#define RISCV_HWPROBE_EXT_ZKND (1ULL << 11) -#define RISCV_HWPROBE_EXT_ZKNE (1ULL << 12) -#define RISCV_HWPROBE_EXT_ZKNH (1ULL << 13) -#define RISCV_HWPROBE_EXT_ZKSED (1ULL << 14) -#define RISCV_HWPROBE_EXT_ZKSH (1ULL << 15) -#define RISCV_HWPROBE_EXT_ZKT (1ULL << 16) -#define RISCV_HWPROBE_EXT_ZVBB (1ULL << 17) -#define RISCV_HWPROBE_EXT_ZVBC (1ULL << 18) -#define RISCV_HWPROBE_EXT_ZVKB (1ULL << 19) -#define RISCV_HWPROBE_EXT_ZVKG (1ULL << 20) -#define RISCV_HWPROBE_EXT_ZVKNED (1ULL << 21) -#define RISCV_HWPROBE_EXT_ZVKNHA (1ULL << 22) -#define RISCV_HWPROBE_EXT_ZVKNHB (1ULL << 23) -#define RISCV_HWPROBE_EXT_ZVKSED (1ULL << 24) -#define RISCV_HWPROBE_EXT_ZVKSH (1ULL << 25) -#define RISCV_HWPROBE_EXT_ZVKT (1ULL << 26) -#define RISCV_HWPROBE_EXT_ZFH (1ULL << 27) -#define RISCV_HWPROBE_EXT_ZFHMIN (1ULL << 28) -#define RISCV_HWPROBE_EXT_ZIHINTNTL (1ULL << 29) -#define RISCV_HWPROBE_EXT_ZVFH (1ULL << 30) -#define RISCV_HWPROBE_EXT_ZVFHMIN (1ULL << 31) -#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32) -#define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33) -#define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34) -#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35) -#define RISCV_HWPROBE_EXT_ZIHINTPAUSE (1ULL << 36) -#define RISCV_HWPROBE_KEY_CPUPERF_0 5 -#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) -#define RISCV_HWPROBE_MISALIGNED_EMULATED (1ULL << 0) -#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0) -#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0) -#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0) -#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0) -#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6 -/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ - -struct riscv_hwprobe { - long long key; - unsigned long long value; -}; - -#define __NR_riscv_hwprobe 258 -static long initHwProbe(struct riscv_hwprobe *Hwprobes, int len) { - return syscall_impl_5_args(__NR_riscv_hwprobe, (long)Hwprobes, len, 0, 0, 0); -} - -#define SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(EXTNAME) \ - SET_SINGLE_IMAEXT_RISCV_FEATURE(RISCV_HWPROBE_EXT_##EXTNAME, EXTNAME) - -#define SET_SINGLE_IMAEXT_RISCV_FEATURE(HWPROBE_BITMASK, EXT) \ - SET_SINGLE_RISCV_FEATURE(IMAEXT0Value &HWPROBE_BITMASK, EXT) - -#define SET_SINGLE_RISCV_FEATURE(COND, EXT) \ - if (COND) { \ - SET_RISCV_FEATURE(EXT); \ - } - -#define SET_RISCV_FEATURE(EXT) features[EXT##_GROUPID] |= EXT##_BITMASK - -static void initRISCVFeature(struct riscv_hwprobe Hwprobes[]) { - - // Note: If a hwprobe key is unknown to the kernel, its key field - // will be cleared to -1, and its value set to 0. - // This unsets all extension bitmask bits. - - // Init vendor extension - __riscv_vendor_feature_bits.length = 0; - __riscv_vendor_feature_bits.vendorID = Hwprobes[2].value; - - // Init standard extension - // TODO: Maybe Extension implied generate from tablegen? - __riscv_feature_bits.length = RISCV_FEATURE_BITS_LENGTH; - - unsigned long long features[RISCV_FEATURE_BITS_LENGTH]; - int i; - - for (i = 0; i < RISCV_FEATURE_BITS_LENGTH; i++) - features[i] = 0; - - // Check RISCV_HWPROBE_KEY_BASE_BEHAVIOR - unsigned long long BaseValue = Hwprobes[0].value; - if (BaseValue & RISCV_HWPROBE_BASE_BEHAVIOR_IMA) { - SET_RISCV_FEATURE(I); - SET_RISCV_FEATURE(M); - SET_RISCV_FEATURE(A); - } - - // Check RISCV_HWPROBE_KEY_IMA_EXT_0 - unsigned long long IMAEXT0Value = Hwprobes[1].value; - if (IMAEXT0Value & RISCV_HWPROBE_IMA_FD) { - SET_RISCV_FEATURE(F); - SET_RISCV_FEATURE(D); - } - - SET_SINGLE_IMAEXT_RISCV_FEATURE(RISCV_HWPROBE_IMA_C, C); - SET_SINGLE_IMAEXT_RISCV_FEATURE(RISCV_HWPROBE_IMA_V, V); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBA); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBB); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBS); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZICBOZ); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBC); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBKB); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBKC); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBKX); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKND); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKNE); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKNH); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKSED); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKSH); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKT); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVBB); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVBC); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKB); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKG); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKNED); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKNHA); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKNHB); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKSED); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKSH); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKT); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZFH); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZFHMIN); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZIHINTNTL); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZIHINTPAUSE); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVFH); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVFHMIN); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZFA); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZTSO); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZACAS); - SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZICOND); - - for (i = 0; i < RISCV_FEATURE_BITS_LENGTH; i++) - __riscv_feature_bits.features[i] = features[i]; -} - -#endif // defined(__linux__) - -static int FeaturesBitCached = 0; - -void __init_riscv_feature_bits() { - - if (FeaturesBitCached) - return; - -#if defined(__linux__) - struct riscv_hwprobe Hwprobes[] = { - {RISCV_HWPROBE_KEY_BASE_BEHAVIOR, 0}, - {RISCV_HWPROBE_KEY_IMA_EXT_0, 0}, - {RISCV_HWPROBE_KEY_MVENDORID, 0}, - }; - if (initHwProbe(Hwprobes, sizeof(Hwprobes) / sizeof(Hwprobes[0]))) - return; - - initRISCVFeature(Hwprobes); -#endif // defined(__linux__) - - FeaturesBitCached = 1; -} diff --git a/compiler-rt/lib/interception/interception_linux.h b/compiler-rt/lib/interception/interception_linux.h index 433a3d9bd7fa..2e01ff44578c 100644 --- a/compiler-rt/lib/interception/interception_linux.h +++ b/compiler-rt/lib/interception/interception_linux.h @@ -28,12 +28,14 @@ bool InterceptFunction(const char *name, const char *ver, uptr *ptr_to_real, uptr func, uptr trampoline); } // namespace __interception -#define INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) \ - ::__interception::InterceptFunction( \ - #func, \ - (::__interception::uptr *)&REAL(func), \ - (::__interception::uptr)&(func), \ - (::__interception::uptr)&TRAMPOLINE(func)) +// Cast func to type of REAL(func) before casting to uptr in case it is an +// overloaded function, which is the case for some glibc functions when +// _FORTIFY_SOURCE is used. This disambiguates which overload to use. +#define INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) \ + ::__interception::InterceptFunction( \ + #func, (::__interception::uptr *)&REAL(func), \ + (::__interception::uptr)(decltype(REAL(func)))&(func), \ + (::__interception::uptr) &TRAMPOLINE(func)) // dlvsym is a GNU extension supported by some other platforms. #if SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD @@ -41,7 +43,7 @@ bool InterceptFunction(const char *name, const char *ver, uptr *ptr_to_real, ::__interception::InterceptFunction( \ #func, symver, \ (::__interception::uptr *)&REAL(func), \ - (::__interception::uptr)&(func), \ + (::__interception::uptr)(decltype(REAL(func)))&(func), \ (::__interception::uptr)&TRAMPOLINE(func)) #else #define INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) \ diff --git a/compiler-rt/lib/nsan/nsan_interceptors.cpp b/compiler-rt/lib/nsan/nsan_interceptors.cpp index 544b44f53cc4..852524bd3733 100644 --- a/compiler-rt/lib/nsan/nsan_interceptors.cpp +++ b/compiler-rt/lib/nsan/nsan_interceptors.cpp @@ -21,10 +21,6 @@ #include <wchar.h> -#if SANITIZER_LINUX -extern "C" int mallopt(int param, int value); -#endif - using namespace __sanitizer; using __nsan::nsan_init_is_running; using __nsan::nsan_initialized; @@ -209,12 +205,6 @@ void __nsan::InitializeInterceptors() { static bool initialized = false; CHECK(!initialized); - // Instruct libc malloc to consume less memory. -#if SANITIZER_LINUX - mallopt(1, 0); // M_MXFAST - mallopt(-3, 32 * 1024); // M_MMAP_THRESHOLD -#endif - InitializeMallocInterceptors(); INTERCEPT_FUNCTION(memset); diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h index d424a22c212c..6906d52eacaf 100644 --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -49,7 +49,6 @@ typedef struct ValueProfNode { #include "profile/InstrProfData.inc" } ValueProfNode; -typedef void *IntPtrT; typedef struct COMPILER_RT_ALIGNAS(INSTR_PROF_DATA_ALIGNMENT) VTableProfData { #define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer) Type Name; #include "profile/InstrProfData.inc" diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors.cpp index 4d5423ec629d..b63040446e53 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors.cpp @@ -21,6 +21,18 @@ #include "rtsan/rtsan_context.h" #if SANITIZER_APPLE + +#if TARGET_OS_MAC +// On MacOS OSSpinLockLock is deprecated and no longer present in the headers, +// but the symbol still exists on the system. Forward declare here so we +// don't get compilation errors. +#include <stdint.h> +extern "C" { +typedef int32_t OSSpinLock; +void OSSpinLockLock(volatile OSSpinLock *__lock); +} +#endif + #include <libkern/OSAtomic.h> #include <os/lock.h> #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 483a1042a623..1d6a55bdb7f3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -220,7 +220,7 @@ uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd, // mmap2 specifies file offset in 4096-byte units. CHECK(IsAligned(offset, 4096)); return internal_syscall(SYSCALL(mmap2), addr, length, prot, flags, fd, - offset / 4096); + (OFF_T)(offset / 4096)); # endif } # endif // !SANITIZER_S390 @@ -2242,25 +2242,25 @@ void SignalContext::DumpAllRegisters(void *context) { # elif SANITIZER_FREEBSD # if defined(__x86_64__) Report("Register values:\n"); - Printf("rax = 0x%016llx ", ucontext->uc_mcontext.mc_rax); - Printf("rbx = 0x%016llx ", ucontext->uc_mcontext.mc_rbx); - Printf("rcx = 0x%016llx ", ucontext->uc_mcontext.mc_rcx); - Printf("rdx = 0x%016llx ", ucontext->uc_mcontext.mc_rdx); + Printf("rax = 0x%016lx ", ucontext->uc_mcontext.mc_rax); + Printf("rbx = 0x%016lx ", ucontext->uc_mcontext.mc_rbx); + Printf("rcx = 0x%016lx ", ucontext->uc_mcontext.mc_rcx); + Printf("rdx = 0x%016lx ", ucontext->uc_mcontext.mc_rdx); Printf("\n"); - Printf("rdi = 0x%016llx ", ucontext->uc_mcontext.mc_rdi); - Printf("rsi = 0x%016llx ", ucontext->uc_mcontext.mc_rsi); - Printf("rbp = 0x%016llx ", ucontext->uc_mcontext.mc_rbp); - Printf("rsp = 0x%016llx ", ucontext->uc_mcontext.mc_rsp); + Printf("rdi = 0x%016lx ", ucontext->uc_mcontext.mc_rdi); + Printf("rsi = 0x%016lx ", ucontext->uc_mcontext.mc_rsi); + Printf("rbp = 0x%016lx ", ucontext->uc_mcontext.mc_rbp); + Printf("rsp = 0x%016lx ", ucontext->uc_mcontext.mc_rsp); Printf("\n"); - Printf(" r8 = 0x%016llx ", ucontext->uc_mcontext.mc_r8); - Printf(" r9 = 0x%016llx ", ucontext->uc_mcontext.mc_r9); - Printf("r10 = 0x%016llx ", ucontext->uc_mcontext.mc_r10); - Printf("r11 = 0x%016llx ", ucontext->uc_mcontext.mc_r11); + Printf(" r8 = 0x%016lx ", ucontext->uc_mcontext.mc_r8); + Printf(" r9 = 0x%016lx ", ucontext->uc_mcontext.mc_r9); + Printf("r10 = 0x%016lx ", ucontext->uc_mcontext.mc_r10); + Printf("r11 = 0x%016lx ", ucontext->uc_mcontext.mc_r11); Printf("\n"); - Printf("r12 = 0x%016llx ", ucontext->uc_mcontext.mc_r12); - Printf("r13 = 0x%016llx ", ucontext->uc_mcontext.mc_r13); - Printf("r14 = 0x%016llx ", ucontext->uc_mcontext.mc_r14); - Printf("r15 = 0x%016llx ", ucontext->uc_mcontext.mc_r15); + Printf("r12 = 0x%016lx ", ucontext->uc_mcontext.mc_r12); + Printf("r13 = 0x%016lx ", ucontext->uc_mcontext.mc_r13); + Printf("r14 = 0x%016lx ", ucontext->uc_mcontext.mc_r14); + Printf("r15 = 0x%016lx ", ucontext->uc_mcontext.mc_r15); Printf("\n"); # elif defined(__i386__) Report("Register values:\n"); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp index a2000798a390..74f435287af3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp @@ -58,17 +58,16 @@ void BufferedStackTrace::UnwindFast(uptr pc, uptr bp, uptr stack_top, // Avoid infinite loop when frame == frame[0] by using frame > prev_frame. while (IsValidFrame(bp, stack_top, bottom) && IsAligned(bp, sizeof(uhwptr)) && size < max_depth) { - uhwptr pc1 = ((uhwptr *)bp)[15]; + // %o7 contains the address of the call instruction and not the + // return address, so we need to compensate. + uhwptr pc1 = GetNextInstructionPc(((uhwptr *)bp)[15]); // Let's assume that any pointer in the 0th page is invalid and // stop unwinding here. If we're adding support for a platform // where this isn't true, we need to reconsider this check. if (pc1 < kPageSize) break; - if (pc1 != pc) { - // %o7 contains the address of the call instruction and not the - // return address, so we need to compensate. - trace_buffer[size++] = GetNextInstructionPc((uptr)pc1); - } + if (pc1 != pc) + trace_buffer[size++] = pc1; bottom = bp; bp = (uptr)((uhwptr *)bp)[14] + STACK_BIAS; } diff --git a/libcxx/include/__atomic/atomic_ref.h b/libcxx/include/__atomic/atomic_ref.h index 156f1961151c..b0180a37ab50 100644 --- a/libcxx/include/__atomic/atomic_ref.h +++ b/libcxx/include/__atomic/atomic_ref.h @@ -42,13 +42,21 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 -template <class _Tp> -struct __atomic_ref_base { -protected: - _Tp* __ptr_; +// These types are required to make __atomic_is_always_lock_free work across GCC and Clang. +// The purpose of this trick is to make sure that we provide an object with the correct alignment +// to __atomic_is_always_lock_free, since that answer depends on the alignment. +template <size_t _Alignment> +struct __alignment_checker_type { + alignas(_Alignment) char __data; +}; - _LIBCPP_HIDE_FROM_ABI __atomic_ref_base(_Tp& __obj) : __ptr_(std::addressof(__obj)) {} +template <size_t _Alignment> +struct __get_aligner_instance { + static constexpr __alignment_checker_type<_Alignment> __instance{}; +}; +template <class _Tp> +struct __atomic_ref_base { private: _LIBCPP_HIDE_FROM_ABI static _Tp* __clear_padding(_Tp& __val) noexcept { _Tp* __ptr = std::addressof(__val); @@ -95,17 +103,21 @@ private: friend struct __atomic_waitable_traits<__atomic_ref_base<_Tp>>; + // require types that are 1, 2, 4, 8, or 16 bytes in length to be aligned to at least their size to be potentially + // used lock-free + static constexpr size_t __min_alignment = (sizeof(_Tp) & (sizeof(_Tp) - 1)) || (sizeof(_Tp) > 16) ? 0 : sizeof(_Tp); + public: using value_type = _Tp; - static constexpr size_t required_alignment = alignof(_Tp); + static constexpr size_t required_alignment = alignof(_Tp) > __min_alignment ? alignof(_Tp) : __min_alignment; // The __atomic_always_lock_free builtin takes into account the alignment of the pointer if provided, // so we create a fake pointer with a suitable alignment when querying it. Note that we are guaranteed // that the pointer is going to be aligned properly at runtime because that is a (checked) precondition // of atomic_ref's constructor. static constexpr bool is_always_lock_free = - __atomic_always_lock_free(sizeof(_Tp), reinterpret_cast<void*>(-required_alignment)); + __atomic_always_lock_free(sizeof(_Tp), &__get_aligner_instance<required_alignment>::__instance); _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const noexcept { return __atomic_is_lock_free(sizeof(_Tp), __ptr_); } @@ -205,6 +217,12 @@ public: } _LIBCPP_HIDE_FROM_ABI void notify_one() const noexcept { std::__atomic_notify_one(*this); } _LIBCPP_HIDE_FROM_ABI void notify_all() const noexcept { std::__atomic_notify_all(*this); } + +protected: + typedef _Tp _Aligned_Tp __attribute__((aligned(required_alignment))); + _Aligned_Tp* __ptr_; + + _LIBCPP_HIDE_FROM_ABI __atomic_ref_base(_Tp& __obj) : __ptr_(std::addressof(__obj)) {} }; template <class _Tp> diff --git a/libcxx/include/__type_traits/remove_cv.h b/libcxx/include/__type_traits/remove_cv.h index 50e9f3e8aa78..c4bf612794bd 100644 --- a/libcxx/include/__type_traits/remove_cv.h +++ b/libcxx/include/__type_traits/remove_cv.h @@ -10,6 +10,8 @@ #define _LIBCPP___TYPE_TRAITS_REMOVE_CV_H #include <__config> +#include <__type_traits/remove_const.h> +#include <__type_traits/remove_volatile.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -17,18 +19,23 @@ _LIBCPP_BEGIN_NAMESPACE_STD +#if __has_builtin(__remove_cv) && !defined(_LIBCPP_COMPILER_GCC) template <class _Tp> struct remove_cv { using type _LIBCPP_NODEBUG = __remove_cv(_Tp); }; -#if defined(_LIBCPP_COMPILER_GCC) template <class _Tp> -using __remove_cv_t = typename remove_cv<_Tp>::type; +using __remove_cv_t = __remove_cv(_Tp); #else template <class _Tp> -using __remove_cv_t = __remove_cv(_Tp); -#endif +struct _LIBCPP_TEMPLATE_VIS remove_cv { + typedef __remove_volatile_t<__remove_const_t<_Tp> > type; +}; + +template <class _Tp> +using __remove_cv_t = __remove_volatile_t<__remove_const_t<_Tp> >; +#endif // __has_builtin(__remove_cv) #if _LIBCPP_STD_VER >= 14 template <class _Tp> diff --git a/libcxx/include/__type_traits/remove_cvref.h b/libcxx/include/__type_traits/remove_cvref.h index 55f894dbd1d8..e8e8745ab096 100644 --- a/libcxx/include/__type_traits/remove_cvref.h +++ b/libcxx/include/__type_traits/remove_cvref.h @@ -20,26 +20,21 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if defined(_LIBCPP_COMPILER_GCC) +#if __has_builtin(__remove_cvref) && !defined(_LIBCPP_COMPILER_GCC) template <class _Tp> -struct __remove_cvref_gcc { - using type = __remove_cvref(_Tp); -}; - -template <class _Tp> -using __remove_cvref_t _LIBCPP_NODEBUG = typename __remove_cvref_gcc<_Tp>::type; +using __remove_cvref_t _LIBCPP_NODEBUG = __remove_cvref(_Tp); #else template <class _Tp> -using __remove_cvref_t _LIBCPP_NODEBUG = __remove_cvref(_Tp); +using __remove_cvref_t _LIBCPP_NODEBUG = __remove_cv_t<__libcpp_remove_reference_t<_Tp> >; #endif // __has_builtin(__remove_cvref) template <class _Tp, class _Up> -using __is_same_uncvref = _IsSame<__remove_cvref_t<_Tp>, __remove_cvref_t<_Up> >; +struct __is_same_uncvref : _IsSame<__remove_cvref_t<_Tp>, __remove_cvref_t<_Up> > {}; #if _LIBCPP_STD_VER >= 20 template <class _Tp> struct remove_cvref { - using type _LIBCPP_NODEBUG = __remove_cvref(_Tp); + using type _LIBCPP_NODEBUG = __remove_cvref_t<_Tp>; }; template <class _Tp> diff --git a/libcxx/include/typeinfo b/libcxx/include/typeinfo index d1c0de3c1bfd..2727cad02fa9 100644 --- a/libcxx/include/typeinfo +++ b/libcxx/include/typeinfo @@ -275,13 +275,14 @@ struct __type_info_implementations { __impl; }; -# if defined(__arm64__) && __has_cpp_attribute(clang::ptrauth_vtable_pointer) -# if __has_feature(ptrauth_type_info_discriminated_vtable_pointer) +# if __has_cpp_attribute(_Clang::__ptrauth_vtable_pointer__) +# if __has_feature(ptrauth_type_info_vtable_pointer_discrimination) # define _LIBCPP_TYPE_INFO_VTABLE_POINTER_AUTH \ - [[clang::ptrauth_vtable_pointer(process_independent, address_discrimination, type_discrimination)]] + [[_Clang::__ptrauth_vtable_pointer__(process_independent, address_discrimination, type_discrimination)]] # else # define _LIBCPP_TYPE_INFO_VTABLE_POINTER_AUTH \ - [[clang::ptrauth_vtable_pointer(process_independent, no_address_discrimination, no_extra_discrimination)]] + [[_Clang::__ptrauth_vtable_pointer__( \ + process_independent, no_address_discrimination, no_extra_discrimination)]] # endif # else # define _LIBCPP_TYPE_INFO_VTABLE_POINTER_AUTH diff --git a/libcxx/include/version b/libcxx/include/version index 40548098a92d..fe64343eafbc 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -238,7 +238,7 @@ __cpp_lib_string_view 202403L <string> <string __cpp_lib_submdspan 202306L <mdspan> __cpp_lib_syncbuf 201803L <syncstream> __cpp_lib_text_encoding 202306L <text_encoding> -__cpp_lib_three_way_comparison 201711L <compare> +__cpp_lib_three_way_comparison 201907L <compare> __cpp_lib_to_address 201711L <memory> __cpp_lib_to_array 201907L <array> __cpp_lib_to_chars 202306L <charconv> @@ -446,7 +446,7 @@ __cpp_lib_void_t 201411L <type_traits> # if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_SYNCSTREAM) # define __cpp_lib_syncbuf 201803L # endif -# define __cpp_lib_three_way_comparison 201711L +# define __cpp_lib_three_way_comparison 201907L # define __cpp_lib_to_address 201711L # define __cpp_lib_to_array 201907L # define __cpp_lib_type_identity 201806L diff --git a/libcxx/src/include/overridable_function.h b/libcxx/src/include/overridable_function.h index e71e4f104b29..c7639f56eee2 100644 --- a/libcxx/src/include/overridable_function.h +++ b/libcxx/src/include/overridable_function.h @@ -13,7 +13,7 @@ #include <__config> #include <cstdint> -#if defined(__arm64e__) && __has_feature(ptrauth_calls) +#if __has_feature(ptrauth_calls) # include <ptrauth.h> #endif @@ -83,13 +83,13 @@ _LIBCPP_HIDE_FROM_ABI bool __is_function_overridden(_Ret (*__fptr)(_Args...)) no uintptr_t __end = reinterpret_cast<uintptr_t>(&__lcxx_override_end); uintptr_t __ptr = reinterpret_cast<uintptr_t>(__fptr); -#if defined(__arm64e__) && __has_feature(ptrauth_calls) +# if __has_feature(ptrauth_calls) // We must pass a void* to ptrauth_strip since it only accepts a pointer type. Also, in particular, // we must NOT pass a function pointer, otherwise we will strip the function pointer, and then attempt // to authenticate and re-sign it when casting it to a uintptr_t again, which will fail because we just // stripped the function pointer. See rdar://122927845. __ptr = reinterpret_cast<uintptr_t>(ptrauth_strip(reinterpret_cast<void*>(__ptr), ptrauth_key_function_pointer)); -#endif +# endif // Finally, the function was overridden if it falls outside of the section's bounds. return __ptr < __start || __ptr > __end; diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 2ec60e4c123d..758557337899 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -2589,7 +2589,8 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) { --pc; #endif -#if !(defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)) +#if !(defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)) && \ + !defined(_LIBUNWIND_SUPPORT_TBTAB_UNWIND) // In case of this is frame of signal handler, the IP saved in the signal // handler points to first non-executed instruction, while FDE/CIE expects IP // to be after the first non-executed instruction. diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 9466e8b1ce54..db0bc6c76009 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -511,6 +511,12 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, return R_TLSDESC; case R_LARCH_TLS_DESC_CALL: return R_TLSDESC_CALL; + case R_LARCH_TLS_LD_PCREL20_S2: + return R_TLSLD_PC; + case R_LARCH_TLS_GD_PCREL20_S2: + return R_TLSGD_PC; + case R_LARCH_TLS_DESC_PCREL20_S2: + return R_TLSDESC_PC; // Other known relocs that are explicitly unimplemented: // @@ -557,7 +563,11 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, write64le(loc, val); return; + // Relocs intended for `pcaddi`. case R_LARCH_PCREL20_S2: + case R_LARCH_TLS_LD_PCREL20_S2: + case R_LARCH_TLS_GD_PCREL20_S2: + case R_LARCH_TLS_DESC_PCREL20_S2: checkInt(loc, val, 22, rel); checkAlignment(loc, val, 4, rel); write32le(loc, setJ20(read32le(loc), val >> 2)); diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp index 5d58e0c60a95..517d26810a37 100644 --- a/lld/ELF/DWARF.cpp +++ b/lld/ELF/DWARF.cpp @@ -136,7 +136,8 @@ template <class ELFT> std::optional<RelocAddrEntry> LLDDwarfObj<ELFT>::find(const llvm::DWARFSection &s, uint64_t pos) const { auto &sec = static_cast<const LLDDWARFSection &>(s); - const RelsOrRelas<ELFT> rels = sec.sec->template relsOrRelas<ELFT>(); + const RelsOrRelas<ELFT> rels = + sec.sec->template relsOrRelas<ELFT>(/*supportsCrel=*/false); if (rels.areRelocsRel()) return findAux(*sec.sec, pos, rels.rels); return findAux(*sec.sec, pos, rels.relas); diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index bfc605c793a9..44e8a71cc628 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -103,12 +103,12 @@ private: void segregate(size_t begin, size_t end, uint32_t eqClassBase, bool constant); template <class RelTy> - bool constantEq(const InputSection *a, ArrayRef<RelTy> relsA, - const InputSection *b, ArrayRef<RelTy> relsB); + bool constantEq(const InputSection *a, Relocs<RelTy> relsA, + const InputSection *b, Relocs<RelTy> relsB); template <class RelTy> - bool variableEq(const InputSection *a, ArrayRef<RelTy> relsA, - const InputSection *b, ArrayRef<RelTy> relsB); + bool variableEq(const InputSection *a, Relocs<RelTy> relsA, + const InputSection *b, Relocs<RelTy> relsB); bool equalsConstant(const InputSection *a, const InputSection *b); bool equalsVariable(const InputSection *a, const InputSection *b); @@ -235,8 +235,8 @@ void ICF<ELFT>::segregate(size_t begin, size_t end, uint32_t eqClassBase, // Compare two lists of relocations. template <class ELFT> template <class RelTy> -bool ICF<ELFT>::constantEq(const InputSection *secA, ArrayRef<RelTy> ra, - const InputSection *secB, ArrayRef<RelTy> rb) { +bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra, + const InputSection *secB, Relocs<RelTy> rb) { if (ra.size() != rb.size()) return false; auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); @@ -324,6 +324,8 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) { const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>(); const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>(); + if (ra.areRelocsCrel()) + return constantEq(a, ra.crels, b, rb.crels); return ra.areRelocsRel() || rb.areRelocsRel() ? constantEq(a, ra.rels, b, rb.rels) : constantEq(a, ra.relas, b, rb.relas); @@ -333,8 +335,8 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) { // relocations point to the same section in terms of ICF. template <class ELFT> template <class RelTy> -bool ICF<ELFT>::variableEq(const InputSection *secA, ArrayRef<RelTy> ra, - const InputSection *secB, ArrayRef<RelTy> rb) { +bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra, + const InputSection *secB, Relocs<RelTy> rb) { assert(ra.size() == rb.size()); auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); @@ -374,6 +376,8 @@ template <class ELFT> bool ICF<ELFT>::equalsVariable(const InputSection *a, const InputSection *b) { const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>(); const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>(); + if (ra.areRelocsCrel()) + return variableEq(a, ra.crels, b, rb.crels); return ra.areRelocsRel() || rb.areRelocsRel() ? variableEq(a, ra.rels, b, rb.rels) : variableEq(a, ra.relas, b, rb.relas); @@ -441,7 +445,7 @@ void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> fn) { // hash. template <class RelTy> static void combineRelocHashes(unsigned cnt, InputSection *isec, - ArrayRef<RelTy> rels) { + Relocs<RelTy> rels) { uint32_t hash = isec->eqClass[cnt % 2]; for (RelTy rel : rels) { Symbol &s = isec->file->getRelocTargetSym(rel); @@ -505,7 +509,9 @@ template <class ELFT> void ICF<ELFT>::run() { for (unsigned cnt = 0; cnt != 2; ++cnt) { parallelForEach(sections, [&](InputSection *s) { const RelsOrRelas<ELFT> rels = s->template relsOrRelas<ELFT>(); - if (rels.areRelocsRel()) + if (rels.areRelocsCrel()) + combineRelocHashes(cnt, s, rels.crels); + else if (rels.areRelocsRel()) combineRelocHashes(cnt, s, rels.rels); else combineRelocHashes(cnt, s, rels.relas); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 03ff4eadfe67..f1c0eb292361 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -834,6 +834,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats, case SHT_STRTAB: case SHT_REL: case SHT_RELA: + case SHT_CREL: case SHT_NULL: break; case SHT_PROGBITS: diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 0617f41e1e13..8566baf61e1a 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -84,6 +84,7 @@ public: assert(fileKind == ObjKind || fileKind == BinaryKind); return sections; } + void cacheDecodedCrel(size_t i, InputSectionBase *s) { sections[i] = s; } // Returns object file symbols. It is a runtime error to call this // function on files of other types. diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 12ab1f1eac80..570e485455ba 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -133,21 +133,56 @@ void InputSectionBase::decompress() const { compressed = false; } -template <class ELFT> RelsOrRelas<ELFT> InputSectionBase::relsOrRelas() const { +template <class ELFT> +RelsOrRelas<ELFT> InputSectionBase::relsOrRelas(bool supportsCrel) const { if (relSecIdx == 0) return {}; RelsOrRelas<ELFT> ret; - typename ELFT::Shdr shdr = - cast<ELFFileBase>(file)->getELFShdrs<ELFT>()[relSecIdx]; + auto *f = cast<ObjFile<ELFT>>(file); + typename ELFT::Shdr shdr = f->template getELFShdrs<ELFT>()[relSecIdx]; + if (shdr.sh_type == SHT_CREL) { + // Return an iterator if supported by caller. + if (supportsCrel) { + ret.crels = Relocs<typename ELFT::Crel>( + (const uint8_t *)f->mb.getBufferStart() + shdr.sh_offset); + return ret; + } + InputSectionBase *const &relSec = f->getSections()[relSecIdx]; + // Otherwise, allocate a buffer to hold the decoded RELA relocations. When + // called for the first time, relSec is null (without --emit-relocs) or an + // InputSection with zero eqClass[0]. + if (!relSec || !cast<InputSection>(relSec)->eqClass[0]) { + auto *sec = makeThreadLocal<InputSection>(*f, shdr, name); + f->cacheDecodedCrel(relSecIdx, sec); + sec->type = SHT_RELA; + sec->eqClass[0] = SHT_RELA; + + RelocsCrel<ELFT::Is64Bits> entries(sec->content_); + sec->size = entries.size() * sizeof(typename ELFT::Rela); + auto *relas = makeThreadLocalN<typename ELFT::Rela>(entries.size()); + sec->content_ = reinterpret_cast<uint8_t *>(relas); + for (auto [i, r] : llvm::enumerate(entries)) { + relas[i].r_offset = r.r_offset; + relas[i].setSymbolAndType(r.r_symidx, r.r_type, false); + relas[i].r_addend = r.r_addend; + } + } + ret.relas = {ArrayRef( + reinterpret_cast<const typename ELFT::Rela *>(relSec->content_), + relSec->size / sizeof(typename ELFT::Rela))}; + return ret; + } + + const void *content = f->mb.getBufferStart() + shdr.sh_offset; + size_t size = shdr.sh_size; if (shdr.sh_type == SHT_REL) { - ret.rels = ArrayRef(reinterpret_cast<const typename ELFT::Rel *>( - file->mb.getBufferStart() + shdr.sh_offset), - shdr.sh_size / sizeof(typename ELFT::Rel)); + ret.rels = {ArrayRef(reinterpret_cast<const typename ELFT::Rel *>(content), + size / sizeof(typename ELFT::Rel))}; } else { assert(shdr.sh_type == SHT_RELA); - ret.relas = ArrayRef(reinterpret_cast<const typename ELFT::Rela *>( - file->mb.getBufferStart() + shdr.sh_offset), - shdr.sh_size / sizeof(typename ELFT::Rela)); + ret.relas = { + ArrayRef(reinterpret_cast<const typename ELFT::Rela *>(content), + size / sizeof(typename ELFT::Rela))}; } return ret; } @@ -911,7 +946,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, // So, we handle relocations for non-alloc sections directly in this // function as a performance optimization. template <class ELFT, class RelTy> -void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { +void InputSection::relocateNonAlloc(uint8_t *buf, Relocs<RelTy> rels) { const unsigned bits = sizeof(typename ELFT::uint) * 8; const TargetInfo &target = *elf::target; const auto emachine = config->emachine; @@ -1073,11 +1108,7 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) { auto *sec = cast<InputSection>(this); // For a relocatable link, also call relocateNonAlloc() to rewrite applicable // locations with tombstone values. - const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>(); - if (rels.areRelocsRel()) - sec->relocateNonAlloc<ELFT>(buf, rels.rels); - else - sec->relocateNonAlloc<ELFT>(buf, rels.relas); + invokeOnRelocs(*sec, sec->relocateNonAlloc<ELFT>, buf); } // For each function-defining prologue, find any calls to __morestack, @@ -1252,7 +1283,7 @@ SyntheticSection *EhInputSection::getParent() const { // .eh_frame is a sequence of CIE or FDE records. // This function splits an input section into records and returns them. template <class ELFT> void EhInputSection::split() { - const RelsOrRelas<ELFT> rels = relsOrRelas<ELFT>(); + const RelsOrRelas<ELFT> rels = relsOrRelas<ELFT>(/*supportsCrel=*/false); // getReloc expects the relocations to be sorted by r_offset. See the comment // in scanRelocs. if (rels.areRelocsRel()) { @@ -1418,10 +1449,14 @@ template void InputSection::writeTo<ELF32BE>(uint8_t *); template void InputSection::writeTo<ELF64LE>(uint8_t *); template void InputSection::writeTo<ELF64BE>(uint8_t *); -template RelsOrRelas<ELF32LE> InputSectionBase::relsOrRelas<ELF32LE>() const; -template RelsOrRelas<ELF32BE> InputSectionBase::relsOrRelas<ELF32BE>() const; -template RelsOrRelas<ELF64LE> InputSectionBase::relsOrRelas<ELF64LE>() const; -template RelsOrRelas<ELF64BE> InputSectionBase::relsOrRelas<ELF64BE>() const; +template RelsOrRelas<ELF32LE> +InputSectionBase::relsOrRelas<ELF32LE>(bool) const; +template RelsOrRelas<ELF32BE> +InputSectionBase::relsOrRelas<ELF32BE>(bool) const; +template RelsOrRelas<ELF64LE> +InputSectionBase::relsOrRelas<ELF64LE>(bool) const; +template RelsOrRelas<ELF64BE> +InputSectionBase::relsOrRelas<ELF64BE>(bool) const; template MergeInputSection::MergeInputSection(ObjFile<ELF32LE> &, const ELF32LE::Shdr &, StringRef); diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index ec12235f842a..6659530a9c9c 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -35,13 +35,26 @@ class OutputSection; LLVM_LIBRARY_VISIBILITY extern std::vector<Partition> partitions; -// Returned by InputSectionBase::relsOrRelas. At least one member is empty. +// Returned by InputSectionBase::relsOrRelas. At most one member is empty. template <class ELFT> struct RelsOrRelas { - ArrayRef<typename ELFT::Rel> rels; - ArrayRef<typename ELFT::Rela> relas; + Relocs<typename ELFT::Rel> rels; + Relocs<typename ELFT::Rela> relas; + Relocs<typename ELFT::Crel> crels; bool areRelocsRel() const { return rels.size(); } + bool areRelocsCrel() const { return crels.size(); } }; +#define invokeOnRelocs(sec, f, ...) \ + { \ + const RelsOrRelas<ELFT> rs = (sec).template relsOrRelas<ELFT>(); \ + if (rs.areRelocsCrel()) \ + f(__VA_ARGS__, rs.crels); \ + else if (rs.areRelocsRel()) \ + f(__VA_ARGS__, rs.rels); \ + else \ + f(__VA_ARGS__, rs.relas); \ + } + // This is the base class of all sections that lld handles. Some are sections in // input files, some are sections in the produced output file and some exist // just as a convenience for implementing special ways of combining some @@ -200,7 +213,8 @@ public: // used by --gc-sections. InputSectionBase *nextInSectionGroup = nullptr; - template <class ELFT> RelsOrRelas<ELFT> relsOrRelas() const; + template <class ELFT> + RelsOrRelas<ELFT> relsOrRelas(bool supportsCrel = true) const; // InputSections that are dependent on us (reverse dependency for GC) llvm::TinyPtrVector<InputSection *> dependentSections; @@ -407,7 +421,7 @@ public: InputSectionBase *getRelocatedSection() const; template <class ELFT, class RelTy> - void relocateNonAlloc(uint8_t *buf, llvm::ArrayRef<RelTy> rels); + void relocateNonAlloc(uint8_t *buf, Relocs<RelTy> rels); // Points to the canonical section. If ICF folds two sections, repl pointer of // one section points to the other. @@ -474,7 +488,8 @@ public: }; inline bool isStaticRelSecType(uint32_t type) { - return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_REL; + return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_CREL || + type == llvm::ELF::SHT_REL; } inline bool isDebugSection(const InputSectionBase &sec) { diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index e2208da18dce..055fa21d44ca 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -61,6 +61,8 @@ static StringRef getOutputSectionName(const InputSectionBase *s) { assert(config->relocatable && (rel->flags & SHF_LINK_ORDER)); return s->name; } + if (s->type == SHT_CREL) + return saver().save(".crel" + out->name); if (s->type == SHT_RELA) return saver().save(".rela" + out->name); return saver().save(".rel" + out->name); diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 45431e44a6c8..16e5883c2002 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -85,6 +85,13 @@ static uint64_t getAddend(InputSectionBase &sec, return rel.r_addend; } +// Currently, we assume all input CREL relocations have an explicit addend. +template <class ELFT> +static uint64_t getAddend(InputSectionBase &sec, + const typename ELFT::Crel &rel) { + return rel.r_addend; +} + template <class ELFT> template <class RelTy> void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel, @@ -239,7 +246,8 @@ template <class ELFT> void MarkLive<ELFT>::run() { // all of them. We also want to preserve personality routines and LSDA // referenced by .eh_frame sections, so we scan them for that here. for (EhInputSection *eh : ctx.ehInputSections) { - const RelsOrRelas<ELFT> rels = eh->template relsOrRelas<ELFT>(); + const RelsOrRelas<ELFT> rels = + eh->template relsOrRelas<ELFT>(/*supportsCrel=*/false); if (rels.areRelocsRel()) scanEhFrameSection(*eh, rels.rels); else if (rels.relas.size()) @@ -310,6 +318,8 @@ template <class ELFT> void MarkLive<ELFT>::mark() { resolveReloc(sec, rel, false); for (const typename ELFT::Rela &rel : rels.relas) resolveReloc(sec, rel, false); + for (const typename ELFT::Crel &rel : rels.crels) + resolveReloc(sec, rel, false); for (InputSectionBase *isec : sec.dependentSections) enqueue(isec, 0); diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 60de10061c53..29f18f89274f 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -18,6 +18,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Config/llvm-config.h" // LLVM_ENABLE_ZLIB #include "llvm/Support/Compression.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/TimeProfiler.h" @@ -115,7 +116,19 @@ void OutputSection::recordSection(InputSectionBase *isec) { // other InputSections. void OutputSection::commitSection(InputSection *isec) { if (LLVM_UNLIKELY(type != isec->type)) { - if (hasInputSections || typeIsSet) { + if (!hasInputSections && !typeIsSet) { + type = isec->type; + } else if (isStaticRelSecType(type) && isStaticRelSecType(isec->type) && + (type == SHT_CREL) != (isec->type == SHT_CREL)) { + // Combine mixed SHT_REL[A] and SHT_CREL to SHT_CREL. + type = SHT_CREL; + if (type == SHT_REL) { + if (name.consume_front(".rel")) + name = saver().save(".crel" + name); + } else if (name.consume_front(".rela")) { + name = saver().save(".crel" + name); + } + } else { if (typeIsSet || !canMergeToProgbits(type) || !canMergeToProgbits(isec->type)) { // The (NOLOAD) changes the section type to SHT_NOBITS, the intention is @@ -133,8 +146,6 @@ void OutputSection::commitSection(InputSection *isec) { } if (!typeIsSet) type = SHT_PROGBITS; - } else { - type = isec->type; } } if (!hasInputSections) { @@ -470,6 +481,11 @@ void OutputSection::writeTo(uint8_t *buf, parallel::TaskGroup &tg) { llvm::TimeTraceScope timeScope("Write sections", name); if (type == SHT_NOBITS) return; + if (type == SHT_CREL && !(flags & SHF_ALLOC)) { + buf += encodeULEB128(crelHeader, buf); + memcpy(buf, crelBody.data(), crelBody.size()); + return; + } // If the section is compressed due to // --compress-debug-section/--compress-sections, the content is already known. @@ -505,6 +521,12 @@ void OutputSection::writeTo(uint8_t *buf, parallel::TaskGroup &tg) { if (nonZeroFiller) fill(buf, sections.empty() ? size : sections[0]->outSecOff, filler); + if (type == SHT_CREL && !(flags & SHF_ALLOC)) { + buf += encodeULEB128(crelHeader, buf); + memcpy(buf, crelBody.data(), crelBody.size()); + return; + } + auto fn = [=](size_t begin, size_t end) { size_t numSections = sections.size(); for (size_t i = begin; i != end; ++i) { @@ -592,6 +614,103 @@ static void finalizeShtGroup(OutputSection *os, InputSection *section) { os->size = (1 + seen.size()) * sizeof(uint32_t); } +template <class uint> +LLVM_ATTRIBUTE_ALWAYS_INLINE static void +encodeOneCrel(raw_svector_ostream &os, Elf_Crel<sizeof(uint) == 8> &out, + uint offset, const Symbol &sym, uint32_t type, uint addend) { + const auto deltaOffset = static_cast<uint64_t>(offset - out.r_offset); + out.r_offset = offset; + int64_t symidx = in.symTab->getSymbolIndex(sym); + if (sym.type == STT_SECTION) { + auto *d = dyn_cast<Defined>(&sym); + if (d) { + SectionBase *section = d->section; + assert(section->isLive()); + addend = sym.getVA(addend) - section->getOutputSection()->addr; + } else { + // Encode R_*_NONE(symidx=0). + symidx = type = addend = 0; + } + } + + // Similar to llvm::ELF::encodeCrel. + uint8_t b = deltaOffset * 8 + (out.r_symidx != symidx) + + (out.r_type != type ? 2 : 0) + + (uint(out.r_addend) != addend ? 4 : 0); + if (deltaOffset < 0x10) { + os << char(b); + } else { + os << char(b | 0x80); + encodeULEB128(deltaOffset >> 4, os); + } + if (b & 1) { + encodeSLEB128(static_cast<int32_t>(symidx - out.r_symidx), os); + out.r_symidx = symidx; + } + if (b & 2) { + encodeSLEB128(static_cast<int32_t>(type - out.r_type), os); + out.r_type = type; + } + if (b & 4) { + encodeSLEB128(std::make_signed_t<uint>(addend - out.r_addend), os); + out.r_addend = addend; + } +} + +template <class ELFT> +static size_t relToCrel(raw_svector_ostream &os, Elf_Crel<ELFT::Is64Bits> &out, + InputSection *relSec, InputSectionBase *sec) { + const auto &file = *cast<ELFFileBase>(relSec->file); + if (relSec->type == SHT_REL) { + // REL conversion is complex and unsupported yet. + errorOrWarn(toString(relSec) + ": REL cannot be converted to CREL"); + return 0; + } + auto rels = relSec->getDataAs<typename ELFT::Rela>(); + for (auto rel : rels) { + encodeOneCrel<typename ELFT::uint>( + os, out, sec->getVA(rel.r_offset), file.getRelocTargetSym(rel), + rel.getType(config->isMips64EL), getAddend<ELFT>(rel)); + } + return rels.size(); +} + +// Compute the content of a non-alloc CREL section due to -r or --emit-relocs. +// Input CREL sections are decoded while REL[A] need to be converted. +template <bool is64> void OutputSection::finalizeNonAllocCrel() { + using uint = typename Elf_Crel_Impl<is64>::uint; + raw_svector_ostream os(crelBody); + uint64_t totalCount = 0; + Elf_Crel<is64> out{}; + assert(commands.size() == 1); + auto *isd = cast<InputSectionDescription>(commands[0]); + for (InputSection *relSec : isd->sections) { + const auto &file = *cast<ELFFileBase>(relSec->file); + InputSectionBase *sec = relSec->getRelocatedSection(); + if (relSec->type == SHT_CREL) { + RelocsCrel<is64> entries(relSec->content_); + totalCount += entries.size(); + for (Elf_Crel_Impl<is64> r : entries) { + encodeOneCrel<uint>(os, out, uint(sec->getVA(r.r_offset)), + file.getSymbol(r.r_symidx), r.r_type, r.r_addend); + } + continue; + } + + // Convert REL[A] to CREL. + if constexpr (is64) { + totalCount += config->isLE ? relToCrel<ELF64LE>(os, out, relSec, sec) + : relToCrel<ELF64BE>(os, out, relSec, sec); + } else { + totalCount += config->isLE ? relToCrel<ELF32LE>(os, out, relSec, sec) + : relToCrel<ELF32BE>(os, out, relSec, sec); + } + } + + crelHeader = totalCount * 8 + 4; + size = getULEB128Size(crelHeader) + crelBody.size(); +} + void OutputSection::finalize() { InputSection *first = getFirstInputSection(this); @@ -628,6 +747,13 @@ void OutputSection::finalize() { InputSectionBase *s = first->getRelocatedSection(); info = s->getOutputSection()->sectionIndex; flags |= SHF_INFO_LINK; + // Finalize the content of non-alloc CREL. + if (type == SHT_CREL) { + if (config->is64) + finalizeNonAllocCrel<true>(); + else + finalizeNonAllocCrel<false>(); + } } // Returns true if S is in one of the many forms the compiler driver may pass diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h index 78fede48a23f..8c0c52f34ac9 100644 --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -84,6 +84,11 @@ public: Expr alignExpr; Expr lmaExpr; Expr subalignExpr; + + // Used by non-alloc SHT_CREL to hold the header and content byte stream. + uint64_t crelHeader = 0; + SmallVector<char, 0> crelBody; + SmallVector<SectionCommand *, 0> commands; SmallVector<StringRef, 0> phdrs; std::optional<std::array<uint8_t, 4>> filler; @@ -106,6 +111,7 @@ public: // DATA_RELRO_END. bool relro = false; + template <bool is64> void finalizeNonAllocCrel(); void finalize(); template <class ELFT> void writeTo(uint8_t *buf, llvm::parallel::TaskGroup &tg); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 36857d72c647..e19b1e6c8efb 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -475,8 +475,9 @@ private: uint64_t relOff) const; void processAux(RelExpr expr, RelType type, uint64_t offset, Symbol &sym, int64_t addend) const; - template <class ELFT, class RelTy> void scanOne(RelTy *&i); - template <class ELFT, class RelTy> void scan(ArrayRef<RelTy> rels); + template <class ELFT, class RelTy> + void scanOne(typename Relocs<RelTy>::const_iterator &i); + template <class ELFT, class RelTy> void scan(Relocs<RelTy> rels); }; } // namespace @@ -1308,7 +1309,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, // LoongArch does not yet implement transition from TLSDESC to LE/IE, so // generate TLSDESC dynamic relocation for the dynamic linker to handle. if (config->emachine == EM_LOONGARCH && - oneof<R_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_CALL>(expr)) { + oneof<R_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC, + R_TLSDESC_CALL>(expr)) { if (expr != R_TLSDESC_CALL) { sym.setFlags(NEEDS_TLSDESC); c.addReloc({expr, type, offset, addend, &sym}); @@ -1433,15 +1435,17 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, return 0; } -template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) { +template <class ELFT, class RelTy> +void RelocationScanner::scanOne(typename Relocs<RelTy>::const_iterator &i) { const RelTy &rel = *i; uint32_t symIndex = rel.getSymbol(config->isMips64EL); Symbol &sym = sec->getFile<ELFT>()->getSymbol(symIndex); RelType type; - if constexpr (ELFT::Is64Bits) { + if constexpr (ELFT::Is64Bits || RelTy::IsCrel) { type = rel.getType(config->isMips64EL); ++i; } else { + // CREL is unsupported for MIPS N32. if (config->mipsN32Abi) { type = getMipsN32RelType(i); } else { @@ -1494,15 +1498,18 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) { if ((type == R_PPC64_TLSGD && expr == R_TLSDESC_CALL) || (type == R_PPC64_TLSLD && expr == R_TLSLD_HINT)) { - if (i == end) { - errorOrWarn("R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last " - "relocation" + - getLocation(*sec, sym, offset)); - return; + // Skip the error check for CREL, which does not set `end`. + if constexpr (!RelTy::IsCrel) { + if (i == end) { + errorOrWarn("R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last " + "relocation" + + getLocation(*sec, sym, offset)); + return; + } } - // Offset the 4-byte aligned R_PPC64_TLSGD by one byte in the NOTOC case, - // so we can discern it later from the toc-case. + // Offset the 4-byte aligned R_PPC64_TLSGD by one byte in the NOTOC + // case, so we can discern it later from the toc-case. if (i->getType(/*isMips64EL=*/false) == R_PPC64_REL24_NOTOC) ++offset; } @@ -1542,7 +1549,7 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) { // instructions are generated by very old IBM XL compilers. Work around the // issue by disabling GD/LD to IE/LE relaxation. template <class RelTy> -static void checkPPC64TLSRelax(InputSectionBase &sec, ArrayRef<RelTy> rels) { +static void checkPPC64TLSRelax(InputSectionBase &sec, Relocs<RelTy> rels) { // Skip if sec is synthetic (sec.file is null) or if sec has been marked. if (!sec.file || sec.file->ppc64DisableTLSRelax) return; @@ -1574,7 +1581,7 @@ static void checkPPC64TLSRelax(InputSectionBase &sec, ArrayRef<RelTy> rels) { } template <class ELFT, class RelTy> -void RelocationScanner::scan(ArrayRef<RelTy> rels) { +void RelocationScanner::scan(Relocs<RelTy> rels) { // Not all relocations end up in Sec->Relocations, but a lot do. sec->relocations.reserve(rels.size()); @@ -1590,9 +1597,15 @@ void RelocationScanner::scan(ArrayRef<RelTy> rels) { if (isa<EhInputSection>(sec) || config->emachine == EM_S390) rels = sortRels(rels, storage); - end = static_cast<const void *>(rels.end()); - for (auto i = rels.begin(); i != end;) - scanOne<ELFT>(i); + if constexpr (RelTy::IsCrel) { + for (auto i = rels.begin(); i != rels.end();) + scanOne<ELFT, RelTy>(i); + } else { + // The non-CREL code path has additional check for PPC64 TLS. + end = static_cast<const void *>(rels.end()); + for (auto i = rels.begin(); i != end;) + scanOne<ELFT, RelTy>(i); + } // Sort relocations by offset for more efficient searching for // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64. @@ -1608,7 +1621,9 @@ template <class ELFT> void RelocationScanner::scanSection(InputSectionBase &s) { sec = &s; getter = OffsetGetter(s); const RelsOrRelas<ELFT> rels = s.template relsOrRelas<ELFT>(); - if (rels.areRelocsRel()) + if (rels.areRelocsCrel()) + scan<ELFT>(rels.crels); + else if (rels.areRelocsRel()) scan<ELFT>(rels.rels); else scan<ELFT>(rels.relas); @@ -2409,11 +2424,7 @@ template <class ELFT> void elf::checkNoCrossRefs() { if (!isd) continue; parallelForEach(isd->sections, [&](InputSection *sec) { - const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>(); - if (rels.areRelocsRel()) - scanCrossRefs<ELFT>(noxref, osec, sec, rels.rels); - else - scanCrossRefs<ELFT>(noxref, osec, sec, rels.relas); + invokeOnRelocs(*sec, scanCrossRefs<ELFT>, noxref, osec, sec); }); } } diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 1bee0dedf858..aaa4581490a2 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -12,6 +12,7 @@ #include "lld/Common/LLVM.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Object/ELFTypes.h" #include <vector> namespace lld::elf { @@ -205,6 +206,91 @@ private: uint32_t pass = 0; }; +// Decode LEB128 without error checking. Only used by performance critical code +// like RelocsCrel. +inline uint64_t readLEB128(const uint8_t *&p, uint64_t leb) { + uint64_t acc = 0, shift = 0, byte; + do { + byte = *p++; + acc |= (byte - 128 * (byte >= leb)) << shift; + shift += 7; + } while (byte >= 128); + return acc; +} +inline uint64_t readULEB128(const uint8_t *&p) { return readLEB128(p, 128); } +inline int64_t readSLEB128(const uint8_t *&p) { return readLEB128(p, 64); } + +// This class implements a CREL iterator that does not allocate extra memory. +template <bool is64> struct RelocsCrel { + using uint = std::conditional_t<is64, uint64_t, uint32_t>; + struct const_iterator { + using iterator_category = std::forward_iterator_tag; + using value_type = llvm::object::Elf_Crel_Impl<is64>; + using difference_type = ptrdiff_t; + using pointer = value_type *; + using reference = const value_type &; + uint32_t count; + uint8_t flagBits, shift; + const uint8_t *p; + llvm::object::Elf_Crel_Impl<is64> crel{}; + const_iterator(size_t hdr, const uint8_t *p) + : count(hdr / 8), flagBits(hdr & 4 ? 3 : 2), shift(hdr % 4), p(p) { + if (count) + step(); + } + void step() { + // See object::decodeCrel. + const uint8_t b = *p++; + crel.r_offset += b >> flagBits << shift; + if (b >= 0x80) + crel.r_offset += + ((readULEB128(p) << (7 - flagBits)) - (0x80 >> flagBits)) << shift; + if (b & 1) + crel.r_symidx += readSLEB128(p); + if (b & 2) + crel.r_type += readSLEB128(p); + if (b & 4 && flagBits == 3) + crel.r_addend += static_cast<uint>(readSLEB128(p)); + } + llvm::object::Elf_Crel_Impl<is64> operator*() const { return crel; }; + const llvm::object::Elf_Crel_Impl<is64> *operator->() const { + return &crel; + } + // For llvm::enumerate. + bool operator==(const const_iterator &r) const { return count == r.count; } + bool operator!=(const const_iterator &r) const { return count != r.count; } + const_iterator &operator++() { + if (--count) + step(); + return *this; + } + // For RelocationScanner::scanOne. + void operator+=(size_t n) { + for (; n; --n) + operator++(); + } + }; + + size_t hdr = 0; + const uint8_t *p = nullptr; + + constexpr RelocsCrel() = default; + RelocsCrel(const uint8_t *p) : hdr(readULEB128(p)) { this->p = p; } + size_t size() const { return hdr / 8; } + const_iterator begin() const { return {hdr, p}; } + const_iterator end() const { return {0, nullptr}; } +}; + +template <class RelTy> struct Relocs : ArrayRef<RelTy> { + Relocs() = default; + Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {} +}; + +template <bool is64> +struct Relocs<llvm::object::Elf_Crel_Impl<is64>> : RelocsCrel<is64> { + using RelocsCrel<is64>::RelocsCrel; +}; + // Return a int64_t to make sure we get the sign extension out of the way as // early as possible. template <class ELFT> @@ -215,20 +301,32 @@ template <class ELFT> static inline int64_t getAddend(const typename ELFT::Rela &rel) { return rel.r_addend; } +template <class ELFT> +static inline int64_t getAddend(const typename ELFT::Crel &rel) { + return rel.r_addend; +} template <typename RelTy> -ArrayRef<RelTy> sortRels(ArrayRef<RelTy> rels, SmallVector<RelTy, 0> &storage) { +inline Relocs<RelTy> sortRels(Relocs<RelTy> rels, + SmallVector<RelTy, 0> &storage) { auto cmp = [](const RelTy &a, const RelTy &b) { return a.r_offset < b.r_offset; }; if (!llvm::is_sorted(rels, cmp)) { storage.assign(rels.begin(), rels.end()); llvm::stable_sort(storage, cmp); - rels = storage; + rels = Relocs<RelTy>(storage); } return rels; } +template <bool is64> +inline Relocs<llvm::object::Elf_Crel_Impl<is64>> +sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels, + SmallVector<llvm::object::Elf_Crel_Impl<is64>, 0> &storage) { + return {}; +} + // Returns true if Expr refers a GOT entry. Note that this function returns // false for TLS variables even though they need GOT, because TLS variables uses // GOT differently than the regular variables. diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp index c8c02ab0f3e0..847cd8423b49 100644 --- a/lld/ELF/ScriptLexer.cpp +++ b/lld/ELF/ScriptLexer.cpp @@ -20,11 +20,6 @@ // in various corner cases. We do not care much about efficiency because // the time spent in parsing linker scripts is usually negligible. // -// Our grammar of the linker script is LL(2), meaning that it needs at -// most two-token lookahead to parse. The only place we need two-token -// lookahead is labels in version scripts, where we need to parse "local :" -// as if "local:". -// // Overall, this lexer works fine for most linker scripts. There might // be room for improving compatibility, but that's probably not at the // top of our todo list. diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 5d3f3df216b8..41053c647275 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -455,7 +455,8 @@ template <class ELFT> void EhFrameSection::addSectionAux(EhInputSection *sec) { if (!sec->isLive()) return; - const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>(); + const RelsOrRelas<ELFT> rels = + sec->template relsOrRelas<ELFT>(/*supportsCrel=*/false); if (rels.areRelocsRel()) addRecords<ELFT>(sec, rels.rels); else @@ -489,7 +490,8 @@ void EhFrameSection::iterateFDEWithLSDA( DenseSet<size_t> ciesWithLSDA; for (EhInputSection *sec : sections) { ciesWithLSDA.clear(); - const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>(); + const RelsOrRelas<ELFT> rels = + sec->template relsOrRelas<ELFT>(/*supportsCrel=*/false); if (rels.areRelocsRel()) iterateFDEWithLSDAAux<ELFT>(*sec, rels.rels, ciesWithLSDA, fn); else @@ -3203,10 +3205,10 @@ template <class ELFT> DebugNamesSection<ELFT>::DebugNamesSection() { template <class ELFT> template <class RelTy> void DebugNamesSection<ELFT>::getNameRelocs( - InputSection *sec, ArrayRef<RelTy> rels, - DenseMap<uint32_t, uint32_t> &relocs) { + const InputFile &file, DenseMap<uint32_t, uint32_t> &relocs, + Relocs<RelTy> rels) { for (const RelTy &rel : rels) { - Symbol &sym = sec->file->getRelocTargetSym(rel); + Symbol &sym = file.getRelocTargetSym(rel); relocs[rel.r_offset] = sym.getVA(getAddend<ELFT>(rel)); } } @@ -3216,11 +3218,7 @@ template <class ELFT> void DebugNamesSection<ELFT>::finalizeContents() { auto relocs = std::make_unique<DenseMap<uint32_t, uint32_t>[]>(numChunks); parallelFor(0, numChunks, [&](size_t i) { InputSection *sec = inputSections[i]; - auto rels = sec->template relsOrRelas<ELFT>(); - if (rels.areRelocsRel()) - getNameRelocs(sec, rels.rels, relocs.get()[i]); - else - getNameRelocs(sec, rels.relas, relocs.get()[i]); + invokeOnRelocs(*sec, getNameRelocs, *sec->file, relocs.get()[i]); // Relocate CU offsets with .debug_info + X relocations. OutputChunk &chunk = chunks.get()[i]; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index eaa09ea7194f..d4169e1e1aca 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -916,8 +916,9 @@ public: void writeTo(uint8_t *buf) override; template <class RelTy> - void getNameRelocs(InputSection *sec, ArrayRef<RelTy> rels, - llvm::DenseMap<uint32_t, uint32_t> &relocs); + void getNameRelocs(const InputFile &file, + llvm::DenseMap<uint32_t, uint32_t> &relocs, + Relocs<RelTy> rels); private: static void readOffsets(InputChunk &inputChunk, OutputChunk &chunk, diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 5cffdb771a73..8e3a746a08eb 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -401,10 +401,19 @@ template <class ELFT> static void markUsedLocalSymbols() { InputSection *isec = dyn_cast_or_null<InputSection>(s); if (!isec) continue; - if (isec->type == SHT_REL) + if (isec->type == SHT_REL) { markUsedLocalSymbolsImpl(f, isec->getDataAs<typename ELFT::Rel>()); - else if (isec->type == SHT_RELA) + } else if (isec->type == SHT_RELA) { markUsedLocalSymbolsImpl(f, isec->getDataAs<typename ELFT::Rela>()); + } else if (isec->type == SHT_CREL) { + // The is64=true variant also works with ELF32 since only the r_symidx + // member is used. + for (Elf_Crel_Impl<true> r : RelocsCrel<true>(isec->content_)) { + Symbol &sym = file->getSymbol(r.r_symidx); + if (sym.isLocal()) + sym.used = true; + } + } } } } diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 09081e421e90..98fddcd7bf7f 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -26,6 +26,12 @@ Non-comprehensive list of changes in this release ELF Improvements ---------------- +* Experimental CREL relocations with explicit addends are now supported using the + temporary section type code 0x40000020 (``clang -c -Wa,--crel,--allow-experimental-crel``). + LLVM will change the code and break compatibility (Clang and lld of different + versions are not guaranteed to cooperate, unlike other features). CREL with + implicit addends are not supported. + (`#98115 <https://github.com/llvm/llvm-project/pull/98115>`_) * ``EI_OSABI`` in the output is now inferred from input object files. (`#97144 <https://github.com/llvm/llvm-project/pull/97144>`_) * ``--compress-sections <section-glib>={none,zlib,zstd}[:level]`` is added to compress @@ -88,7 +94,7 @@ ELF Improvements (`#94099 <https://github.com/llvm/llvm-project/pull/94099>`_) Non-alloc orphan sections are now placed at the end. (`#94519 <https://github.com/llvm/llvm-project/pull/94519>`_) -* R_X86_64_REX_GOTPCRELX of the addq form is no longer incorrectly optimized when the address is larger than 0x80000000. +* ``R_X86_64_REX_GOTPCRELX`` of the addq form is no longer incorrectly optimized when the address is larger than 0x80000000. Breaking changes ---------------- diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h index 89ffa617294a..b9537e6952f6 100644 --- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h +++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h @@ -30,7 +30,7 @@ class NativeProcessFreeBSD; class NativeRegisterContextFreeBSD_arm : public NativeRegisterContextFreeBSD { public: NativeRegisterContextFreeBSD_arm(const ArchSpec &target_arch, - NativeThreadProtocol &native_thread); + NativeThreadFreeBSD &native_thread); uint32_t GetRegisterSetCount() const override; diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h index 0b4a508a7d5d..286b4fd8d8b9 100644 --- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h +++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h @@ -31,7 +31,7 @@ class NativeRegisterContextFreeBSD_mips64 : public NativeRegisterContextFreeBSD { public: NativeRegisterContextFreeBSD_mips64(const ArchSpec &target_arch, - NativeThreadProtocol &native_thread); + NativeThreadFreeBSD &native_thread); uint32_t GetRegisterSetCount() const override; diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h index 3df371036f91..420db822acc0 100644 --- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h +++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h @@ -31,7 +31,7 @@ class NativeRegisterContextFreeBSD_powerpc : public NativeRegisterContextFreeBSD { public: NativeRegisterContextFreeBSD_powerpc(const ArchSpec &target_arch, - NativeThreadProtocol &native_thread); + NativeThreadFreeBSD &native_thread); uint32_t GetRegisterSetCount() const override; diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index e34068592de8..8f988d01cb2a 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -2027,6 +2027,12 @@ template <typename L, typename R> bool equal(L &&LRange, R &&RRange) { adl_end(RRange)); } +template <typename L, typename R, typename BinaryPredicate> +bool equal(L &&LRange, R &&RRange, BinaryPredicate P) { + return std::equal(adl_begin(LRange), adl_end(LRange), adl_begin(RRange), + adl_end(RRange), P); +} + /// Returns true if all elements in Range are equal or when the Range is empty. template <typename R> bool all_equal(R &&Range) { auto Begin = adl_begin(Range); diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index 72978b2f746d..0656c0d739fd 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -343,6 +343,13 @@ public: return getFrameIndexReference(MF, FI, FrameReg); } + /// getFrameIndexReferenceFromSP - This method returns the offset from the + /// stack pointer to the slot of the specified index. This function serves to + /// provide a comparable offset from a single reference point (the value of + /// the stack-pointer at function entry) that can be used for analysis. + virtual StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF, + int FI) const; + /// Returns the callee-saved registers as computed by determineCalleeSaves /// in the BitVector \p SavedRegs. virtual void getCalleeSaves(const MachineFunction &MF, diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index fe3f92da400f..94c8fa092f45 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -569,6 +569,10 @@ public: /// The llvm.vp.* intrinsics for this instruction Opcode static Intrinsic::ID getForOpcode(unsigned OC); + /// The llvm.vp.* intrinsics for this intrinsic ID \p Id. Return \p Id if it + /// is already a VP intrinsic. + static Intrinsic::ID getForIntrinsic(Intrinsic::ID Id); + // Whether \p ID is a VP intrinsic ID. static bool isVPIntrinsic(Intrinsic::ID); diff --git a/llvm/include/llvm/IR/VectorBuilder.h b/llvm/include/llvm/IR/VectorBuilder.h index 6af7f6075551..dbb9f4c7336d 100644 --- a/llvm/include/llvm/IR/VectorBuilder.h +++ b/llvm/include/llvm/IR/VectorBuilder.h @@ -15,7 +15,6 @@ #ifndef LLVM_IR_VECTORBUILDER_H #define LLVM_IR_VECTORBUILDER_H -#include <llvm/Analysis/IVDescriptors.h> #include <llvm/IR/IRBuilder.h> #include <llvm/IR/InstrTypes.h> #include <llvm/IR/Instruction.h> @@ -100,11 +99,11 @@ public: const Twine &Name = Twine()); /// Emit a VP reduction intrinsic call for recurrence kind. - /// \param Kind The kind of recurrence + /// \param RdxID The intrinsic ID of llvm.vector.reduce.* /// \param ValTy The type of operand which the reduction operation is /// performed. /// \param VecOpArray The operand list. - Value *createSimpleTargetReduction(RecurKind Kind, Type *ValTy, + Value *createSimpleTargetReduction(Intrinsic::ID RdxID, Type *ValTy, ArrayRef<Value *> VecOpArray, const Twine &Name = Twine()); }; diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index d1d1814dd8b5..3f88ac02cd92 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -217,9 +217,8 @@ public: virtual bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const = 0; - // Return true if fragment offsets have been adjusted and an extra layout - // iteration is needed. - virtual bool finishLayout(const MCAssembler &Asm) const { return false; } + /// Give backend an opportunity to finish layout after relaxation + virtual void finishLayout(MCAssembler const &Asm) const {} /// Handle any target-specific assembler flags. By default, do nothing. virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {} diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h index d9752912ee66..c6fa48128d18 100644 --- a/llvm/include/llvm/MC/MCAssembler.h +++ b/llvm/include/llvm/MC/MCAssembler.h @@ -111,7 +111,6 @@ private: /// Check whether the given fragment needs relaxation. bool fragmentNeedsRelaxation(const MCRelaxableFragment *IF) const; - void layoutSection(MCSection &Sec); /// Perform one layout iteration and return true if any offsets /// were adjusted. bool layoutOnce(); @@ -148,9 +147,10 @@ public: uint64_t computeFragmentSize(const MCFragment &F) const; void layoutBundle(MCFragment *Prev, MCFragment *F) const; + void ensureValid(MCSection &Sec) const; // Get the offset of the given fragment inside its containing section. - uint64_t getFragmentOffset(const MCFragment &F) const { return F.Offset; } + uint64_t getFragmentOffset(const MCFragment &F) const; uint64_t getSectionAddressSize(const MCSection &Sec) const; uint64_t getSectionFileSize(const MCSection &Sec) const; diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index 1289d6f6f9f6..dcdcd094fa17 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -99,6 +99,8 @@ private: /// Whether this section has had instructions emitted into it. bool HasInstructions : 1; + bool HasLayout : 1; + bool IsRegistered : 1; bool IsText : 1; @@ -167,6 +169,9 @@ public: bool hasInstructions() const { return HasInstructions; } void setHasInstructions(bool Value) { HasInstructions = Value; } + bool hasLayout() const { return HasLayout; } + void setHasLayout(bool Value) { HasLayout = Value; } + bool isRegistered() const { return IsRegistered; } void setIsRegistered(bool Value) { IsRegistered = Value; } diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index b01a447f3c28..56880bd4822c 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -359,6 +359,10 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, SinkAndHoistLICMFlags &LICMFlags, OptimizationRemarkEmitter *ORE = nullptr); +/// Returns the llvm.vector.reduce intrinsic that corresponds to the recurrence +/// kind. +constexpr Intrinsic::ID getReductionIntrinsicID(RecurKind RK); + /// Returns the arithmetic instruction opcode used when expanding a reduction. unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID); diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 1c35a88b4dc4..043ea2019148 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -3673,6 +3673,13 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LHSVals.pruneSubRegValues(LHS, ShrinkMask); RHSVals.pruneSubRegValues(LHS, ShrinkMask); + } else if (TrackSubRegLiveness && !CP.getDstIdx() && CP.getSrcIdx()) { + LHS.createSubRangeFrom(LIS->getVNInfoAllocator(), + CP.getNewRC()->getLaneMask(), LHS); + mergeSubRangeInto(LHS, RHS, TRI->getSubRegIndexLaneMask(CP.getSrcIdx()), CP, + CP.getDstIdx()); + LHSVals.pruneMainSegments(LHS, ShrinkMainRange); + LHSVals.pruneSubRegValues(LHS, ShrinkMask); } // The merging algorithm in LiveInterval::join() can't handle conflicting diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp index 940aecd1cb36..0a7a6bad4e86 100644 --- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp +++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp @@ -51,6 +51,8 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass { enum SlotType { Spill, // a Spill slot + Fixed, // a Fixed slot (e.g. arguments passed on the stack) + VariableSized, // a variable sized object StackProtector, // Stack Protector slot Variable, // a slot used to store a local data (could be a tmp) Invalid // It's an error for a slot to have this type @@ -60,29 +62,42 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass { int Slot; int Size; int Align; - int Offset; + StackOffset Offset; SlotType SlotTy; bool Scalable; - SlotData(const MachineFrameInfo &MFI, const int ValOffset, const int Idx) + SlotData(const MachineFrameInfo &MFI, const StackOffset Offset, + const int Idx) : Slot(Idx), Size(MFI.getObjectSize(Idx)), - Align(MFI.getObjectAlign(Idx).value()), - Offset(MFI.getObjectOffset(Idx) - ValOffset), SlotTy(Invalid), - Scalable(false) { + Align(MFI.getObjectAlign(Idx).value()), Offset(Offset), + SlotTy(Invalid), Scalable(false) { Scalable = MFI.getStackID(Idx) == TargetStackID::ScalableVector; if (MFI.isSpillSlotObjectIndex(Idx)) SlotTy = SlotType::Spill; - else if (Idx == MFI.getStackProtectorIndex()) + else if (MFI.isFixedObjectIndex(Idx)) + SlotTy = SlotType::Fixed; + else if (MFI.isVariableSizedObjectIndex(Idx)) + SlotTy = SlotType::VariableSized; + else if (MFI.hasStackProtectorIndex() && + Idx == MFI.getStackProtectorIndex()) SlotTy = SlotType::StackProtector; else SlotTy = SlotType::Variable; } + bool isVarSize() const { return SlotTy == SlotType::VariableSized; } + // We use this to sort in reverse order, so that the layout is displayed - // correctly. Scalable slots are sorted to the end of the list. + // correctly. Variable sized slots are sorted to the end of the list, as + // offsets are currently incorrect for these but they reside at the end of + // the stack frame. The Slot index is used to ensure deterministic order + // when offsets are equal. bool operator<(const SlotData &Rhs) const { - return std::make_tuple(!Scalable, Offset) > - std::make_tuple(!Rhs.Scalable, Rhs.Offset); + return std::make_tuple(!isVarSize(), + Offset.getFixed() + Offset.getScalable(), Slot) > + std::make_tuple(!Rhs.isVarSize(), + Rhs.Offset.getFixed() + Rhs.Offset.getScalable(), + Rhs.Slot); } }; @@ -121,6 +136,10 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass { switch (Ty) { case SlotType::Spill: return "Spill"; + case SlotType::Fixed: + return "Fixed"; + case SlotType::VariableSized: + return "VariableSized"; case SlotType::StackProtector: return "Protector"; case SlotType::Variable: @@ -149,15 +168,27 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass { // For example we store the Offset in YAML as: // ... // - Offset: -8 + // - ScalableOffset: -16 + // Note: the ScalableOffset entries are added only for slots with non-zero + // scalable offsets. // - // But we print it to the CLI as + // But we print it to the CLI as: // Offset: [SP-8] + // + // Or with non-zero scalable offset: + // Offset: [SP-8-16 x vscale] // Negative offsets will print a leading `-`, so only add `+` std::string Prefix = - formatv("\nOffset: [SP{0}", (D.Offset < 0) ? "" : "+").str(); - Rem << Prefix << ore::NV("Offset", D.Offset) - << "], Type: " << ore::NV("Type", getTypeString(D.SlotTy)) + formatv("\nOffset: [SP{0}", (D.Offset.getFixed() < 0) ? "" : "+").str(); + Rem << Prefix << ore::NV("Offset", D.Offset.getFixed()); + + if (D.Offset.getScalable()) { + Rem << ((D.Offset.getScalable() < 0) ? "" : "+") + << ore::NV("ScalableOffset", D.Offset.getScalable()) << " x vscale"; + } + + Rem << "], Type: " << ore::NV("Type", getTypeString(D.SlotTy)) << ", Align: " << ore::NV("Align", D.Align) << ", Size: " << ore::NV("Size", ElementCount::get(D.Size, D.Scalable)); } @@ -170,17 +201,22 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass { Rem << "\n " << ore::NV("DataLoc", Loc); } + StackOffset getStackOffset(const MachineFunction &MF, + const MachineFrameInfo &MFI, + const TargetFrameLowering *FI, int FrameIdx) { + if (!FI) + return StackOffset::getFixed(MFI.getObjectOffset(FrameIdx)); + + return FI->getFrameIndexReferenceFromSP(MF, FrameIdx); + } + void emitStackFrameLayoutRemarks(MachineFunction &MF, MachineOptimizationRemarkAnalysis &Rem) { const MachineFrameInfo &MFI = MF.getFrameInfo(); if (!MFI.hasStackObjects()) return; - // ValOffset is the offset to the local area from the SP at function entry. - // To display the true offset from SP, we need to subtract ValOffset from - // MFI's ObjectOffset. const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); - const int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); LLVM_DEBUG(dbgs() << "getStackProtectorIndex ==" << MFI.getStackProtectorIndex() << "\n"); @@ -194,7 +230,7 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass { Idx != EndIdx; ++Idx) { if (MFI.isDeadObjectIndex(Idx)) continue; - SlotInfo.emplace_back(MFI, ValOffset, Idx); + SlotInfo.emplace_back(MFI, getStackOffset(MF, MFI, FI, Idx), Idx); } // sort the ordering, to match the actual layout in memory diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index 48a2094f5d45..7d054cb7c7c7 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -61,6 +61,20 @@ TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, MFI.getOffsetAdjustment()); } +/// Returns the offset from the stack pointer to the slot of the specified +/// index. This function serves to provide a comparable offset from a single +/// reference point (the value of the stack-pointer at function entry) that can +/// be used for analysis. This is the default implementation using +/// MachineFrameInfo offsets. +StackOffset +TargetFrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, + int FI) const { + // To display the true offset from SP, we need to subtract the offset to the + // local area from MFI's ObjectOffset. + return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI) - + getOffsetOfLocalArea()); +} + bool TargetFrameLowering::needsFrameIndexResolution( const MachineFunction &MF) const { return MF.getFrameInfo().hasStackObjects(); diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 64a14da55b15..db3b0196f66f 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -599,6 +599,25 @@ Intrinsic::ID VPIntrinsic::getForOpcode(unsigned IROPC) { return Intrinsic::not_intrinsic; } +constexpr static Intrinsic::ID getForIntrinsic(Intrinsic::ID Id) { + if (::isVPIntrinsic(Id)) + return Id; + + switch (Id) { + default: + break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) break; +#define VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) case Intrinsic::INTRIN: +#define END_REGISTER_VP_INTRINSIC(VPID) return Intrinsic::VPID; +#include "llvm/IR/VPIntrinsics.def" + } + return Intrinsic::not_intrinsic; +} + +Intrinsic::ID VPIntrinsic::getForIntrinsic(Intrinsic::ID Id) { + return ::getForIntrinsic(Id); +} + bool VPIntrinsic::canIgnoreVectorLengthParam() const { using namespace PatternMatch; diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index ae5f5de14232..fd2f4d184162 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -1318,10 +1318,11 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { ++BI; } - // If we have more than 2 ranges (4 endpoints) we have to try to merge + // We haven't handled wrap in the previous merge, + // if we have at least 2 ranges (4 endpoints) we have to try to merge // the last and first ones. unsigned Size = EndPoints.size(); - if (Size > 4) { + if (Size > 2) { ConstantInt *FB = EndPoints[0]; ConstantInt *FE = EndPoints[1]; if (tryMergeRange(EndPoints, FB, FE)) { diff --git a/llvm/lib/IR/VectorBuilder.cpp b/llvm/lib/IR/VectorBuilder.cpp index 5ff308287989..8dbf25277bf5 100644 --- a/llvm/lib/IR/VectorBuilder.cpp +++ b/llvm/lib/IR/VectorBuilder.cpp @@ -60,60 +60,13 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy, return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name); } -Value *VectorBuilder::createSimpleTargetReduction(RecurKind Kind, Type *ValTy, +Value *VectorBuilder::createSimpleTargetReduction(Intrinsic::ID RdxID, + Type *ValTy, ArrayRef<Value *> InstOpArray, const Twine &Name) { - Intrinsic::ID VPID; - switch (Kind) { - case RecurKind::Add: - VPID = Intrinsic::vp_reduce_add; - break; - case RecurKind::Mul: - VPID = Intrinsic::vp_reduce_mul; - break; - case RecurKind::And: - VPID = Intrinsic::vp_reduce_and; - break; - case RecurKind::Or: - VPID = Intrinsic::vp_reduce_or; - break; - case RecurKind::Xor: - VPID = Intrinsic::vp_reduce_xor; - break; - case RecurKind::FMulAdd: - case RecurKind::FAdd: - VPID = Intrinsic::vp_reduce_fadd; - break; - case RecurKind::FMul: - VPID = Intrinsic::vp_reduce_fmul; - break; - case RecurKind::SMax: - VPID = Intrinsic::vp_reduce_smax; - break; - case RecurKind::SMin: - VPID = Intrinsic::vp_reduce_smin; - break; - case RecurKind::UMax: - VPID = Intrinsic::vp_reduce_umax; - break; - case RecurKind::UMin: - VPID = Intrinsic::vp_reduce_umin; - break; - case RecurKind::FMax: - VPID = Intrinsic::vp_reduce_fmax; - break; - case RecurKind::FMin: - VPID = Intrinsic::vp_reduce_fmin; - break; - case RecurKind::FMaximum: - VPID = Intrinsic::vp_reduce_fmaximum; - break; - case RecurKind::FMinimum: - VPID = Intrinsic::vp_reduce_fminimum; - break; - default: - llvm_unreachable("No VPIntrinsic for this reduction"); - } + auto VPID = VPIntrinsic::getForIntrinsic(RdxID); + assert(VPReductionIntrinsic::isVPReduction(VPID) && + "No VPIntrinsic for this reduction"); return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name); } diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index ceeb7af0fecc..c3da4bb5cc36 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -432,6 +432,28 @@ void MCAssembler::layoutBundle(MCFragment *Prev, MCFragment *F) const { DF->Offset = EF->Offset; } +void MCAssembler::ensureValid(MCSection &Sec) const { + if (Sec.hasLayout()) + return; + Sec.setHasLayout(true); + MCFragment *Prev = nullptr; + uint64_t Offset = 0; + for (MCFragment &F : Sec) { + F.Offset = Offset; + if (isBundlingEnabled() && F.hasInstructions()) { + layoutBundle(Prev, &F); + Offset = F.Offset; + } + Offset += computeFragmentSize(F); + Prev = &F; + } +} + +uint64_t MCAssembler::getFragmentOffset(const MCFragment &F) const { + ensureValid(*F.getParent()); + return F.Offset; +} + // Simple getSymbolOffset helper for the non-variable case. static bool getLabelOffset(const MCAssembler &Asm, const MCSymbol &S, bool ReportError, uint64_t &Val) { @@ -916,20 +938,22 @@ void MCAssembler::layout() { // Layout until everything fits. this->HasLayout = true; - for (MCSection &Sec : *this) - layoutSection(Sec); while (layoutOnce()) { + if (getContext().hadError()) + return; + // Size of fragments in one section can depend on the size of fragments in + // another. If any fragment has changed size, we have to re-layout (and + // as a result possibly further relax) all. + for (MCSection &Sec : *this) + Sec.setHasLayout(false); } DEBUG_WITH_TYPE("mc-dump", { errs() << "assembler backend - post-relaxation\n--\n"; dump(); }); - // Some targets might want to adjust fragment offsets. If so, perform another - // layout loop. - if (getBackend().finishLayout(*this)) - for (MCSection &Sec : *this) - layoutSection(Sec); + // Finalize the layout, including fragment lowering. + getBackend().finishLayout(*this); DEBUG_WITH_TYPE("mc-dump", { errs() << "assembler backend - final-layout\n--\n"; @@ -1282,42 +1306,15 @@ bool MCAssembler::relaxFragment(MCFragment &F) { } } -void MCAssembler::layoutSection(MCSection &Sec) { - MCFragment *Prev = nullptr; - uint64_t Offset = 0; - for (MCFragment &F : Sec) { - F.Offset = Offset; - if (LLVM_UNLIKELY(isBundlingEnabled())) { - if (F.hasInstructions()) { - layoutBundle(Prev, &F); - Offset = F.Offset; - } - Prev = &F; - } - Offset += computeFragmentSize(F); - } -} - bool MCAssembler::layoutOnce() { ++stats::RelaxationSteps; - // Size of fragments in one section can depend on the size of fragments in - // another. If any fragment has changed size, we have to re-layout (and - // as a result possibly further relax) all. - bool ChangedAny = false; - for (MCSection &Sec : *this) { - for (;;) { - bool Changed = false; - for (MCFragment &F : Sec) - if (relaxFragment(F)) - Changed = true; - ChangedAny |= Changed; - if (!Changed) - break; - layoutSection(Sec); - } - } - return ChangedAny; + bool Changed = false; + for (MCSection &Sec : *this) + for (MCFragment &Frag : Sec) + if (relaxFragment(Frag)) + Changed = true; + return Changed; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index 97e87a41c8ce..8c2ee5635a49 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -23,8 +23,8 @@ using namespace llvm; MCSection::MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsVirtual, MCSymbol *Begin) : Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false), - IsRegistered(false), IsText(IsText), IsVirtual(IsVirtual), Name(Name), - Variant(V) { + HasLayout(false), IsRegistered(false), IsText(IsText), + IsVirtual(IsVirtual), Name(Name), Variant(V) { DummyFragment.setParent(this); // The initial subsection number is 0. Create a fragment list. CurFragList = &Subsections.emplace_back(0u, FragList{}).second; diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc index 34d294b232c3..d525f5b16e86 100644 --- a/llvm/lib/Support/Windows/Process.inc +++ b/llvm/lib/Support/Windows/Process.inc @@ -482,7 +482,8 @@ static RTL_OSVERSIONINFOEXW GetWindowsVer() { HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); assert(hMod); - auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion"); + auto getVer = + (RtlGetVersionPtr)(void *)::GetProcAddress(hMod, "RtlGetVersion"); assert(getVer); RTL_OSVERSIONINFOEXW info{}; diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index 29ebf7c696e0..f11ad09f3713 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -171,23 +171,27 @@ static bool load64BitDebugHelp(void) { HMODULE hLib = ::LoadLibraryExA("Dbghelp.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); if (hLib) { - fMiniDumpWriteDump = - (fpMiniDumpWriteDump)::GetProcAddress(hLib, "MiniDumpWriteDump"); - fStackWalk64 = (fpStackWalk64)::GetProcAddress(hLib, "StackWalk64"); - fSymGetModuleBase64 = - (fpSymGetModuleBase64)::GetProcAddress(hLib, "SymGetModuleBase64"); - fSymGetSymFromAddr64 = - (fpSymGetSymFromAddr64)::GetProcAddress(hLib, "SymGetSymFromAddr64"); - fSymGetLineFromAddr64 = - (fpSymGetLineFromAddr64)::GetProcAddress(hLib, "SymGetLineFromAddr64"); - fSymGetModuleInfo64 = - (fpSymGetModuleInfo64)::GetProcAddress(hLib, "SymGetModuleInfo64"); - fSymFunctionTableAccess64 = (fpSymFunctionTableAccess64)::GetProcAddress( - hLib, "SymFunctionTableAccess64"); - fSymSetOptions = (fpSymSetOptions)::GetProcAddress(hLib, "SymSetOptions"); - fSymInitialize = (fpSymInitialize)::GetProcAddress(hLib, "SymInitialize"); - fEnumerateLoadedModules = (fpEnumerateLoadedModules)::GetProcAddress( - hLib, "EnumerateLoadedModules64"); + fMiniDumpWriteDump = (fpMiniDumpWriteDump)(void *)::GetProcAddress( + hLib, "MiniDumpWriteDump"); + fStackWalk64 = (fpStackWalk64)(void *)::GetProcAddress(hLib, "StackWalk64"); + fSymGetModuleBase64 = (fpSymGetModuleBase64)(void *)::GetProcAddress( + hLib, "SymGetModuleBase64"); + fSymGetSymFromAddr64 = (fpSymGetSymFromAddr64)(void *)::GetProcAddress( + hLib, "SymGetSymFromAddr64"); + fSymGetLineFromAddr64 = (fpSymGetLineFromAddr64)(void *)::GetProcAddress( + hLib, "SymGetLineFromAddr64"); + fSymGetModuleInfo64 = (fpSymGetModuleInfo64)(void *)::GetProcAddress( + hLib, "SymGetModuleInfo64"); + fSymFunctionTableAccess64 = + (fpSymFunctionTableAccess64)(void *)::GetProcAddress( + hLib, "SymFunctionTableAccess64"); + fSymSetOptions = + (fpSymSetOptions)(void *)::GetProcAddress(hLib, "SymSetOptions"); + fSymInitialize = + (fpSymInitialize)(void *)::GetProcAddress(hLib, "SymInitialize"); + fEnumerateLoadedModules = + (fpEnumerateLoadedModules)(void *)::GetProcAddress( + hLib, "EnumerateLoadedModules64"); } return isDebugHelpInitialized(); } diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 310b152ef981..415edb189e60 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -833,6 +833,11 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { "EXP+" + MangledName.value()))); A->setAliasee(&F); + if (F.hasDLLExportStorageClass()) { + A->setDLLStorageClass(GlobalValue::DLLExportStorageClass); + F.setDLLStorageClass(GlobalValue::DefaultStorageClass); + } + FnsMap[A] = GlobalAlias::create(GlobalValue::LinkOnceODRLinkage, MangledName.value(), &F); PatchableFns.insert(A); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index b1b83e27c559..bd530903bb66 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2604,6 +2604,41 @@ AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, } StackOffset +AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, + int FI) const { + // This function serves to provide a comparable offset from a single reference + // point (the value of SP at function entry) that can be used for analysis, + // e.g. the stack-frame-layout analysis pass. It is not guaranteed to be + // correct for all objects in the presence of VLA-area objects or dynamic + // stack re-alignment. + + const auto &MFI = MF.getFrameInfo(); + + int64_t ObjectOffset = MFI.getObjectOffset(FI); + + // This is correct in the absence of any SVE stack objects. + StackOffset SVEStackSize = getSVEStackSize(MF); + if (!SVEStackSize) + return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea()); + + const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); + if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()), + ObjectOffset); + } + + bool IsFixed = MFI.isFixedObjectIndex(FI); + bool IsCSR = + !IsFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); + + StackOffset ScalableOffset = {}; + if (!IsFixed && !IsCSR) + ScalableOffset = -SVEStackSize; + + return StackOffset::getFixed(ObjectOffset) + ScalableOffset; +} + +StackOffset AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF, int FI) const { return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI)); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index da315850d636..0ebab1700e9c 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -41,6 +41,8 @@ public: StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; + StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF, + int FI) const override; StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, bool ForSimm) const; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index 3ef00f75735b..879dbe1b279b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCTARGETDESC_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCTARGETDESC_H +#include <cstdint> #include <memory> namespace llvm { diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 642739a29d6b..96d7074e6ef3 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -153,9 +153,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OptimizationGoals = 0; if (Subtarget->isTargetCOFF()) { - bool Internal = F.hasInternalLinkage(); - COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC - : COFF::IMAGE_SYM_CLASS_EXTERNAL; + bool Local = F.hasLocalLinkage(); + COFF::SymbolStorageClass Scl = + Local ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL; int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; OutStreamer->beginCOFFSymbolDef(CurrentFnSym); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 1570493b765c..6acc37e599f2 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -702,7 +702,7 @@ public: return true; } - bool finishLayout(const MCAssembler &Asm) const override { + void finishLayout(MCAssembler const &Asm) const override { SmallVector<MCFragment *> Frags; for (MCSection &Sec : Asm) { Frags.clear(); @@ -747,6 +747,7 @@ public: //assert(!Error); (void)Error; ReplaceInstruction(Asm.getEmitter(), RF, Inst); + Sec.setHasLayout(false); Size = 0; // Only look back one instruction break; } @@ -756,7 +757,6 @@ public: } } } - return true; } }; // class HexagonAsmBackend diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index 208bd3db8f14..f52e188f8779 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -450,6 +450,24 @@ public: IsValidKind; } + bool isSImm20pcaddi() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_PCREL20_S2 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_LD_PCREL20_S2 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_GD_PCREL20_S2 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC_PCREL20_S2; + return IsConstantImm + ? isInt<20>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + bool isSImm21lsl2() const { if (!isImm()) return false; @@ -1676,6 +1694,12 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, /*Upper=*/(1 << 19) - 1, "operand must be a symbol with modifier (e.g. %call36) or an integer " "in the range"); + case Match_InvalidSImm20pcaddi: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 19), + /*Upper=*/(1 << 19) - 1, + "operand must be a symbol with modifier (e.g. %pcrel_20) or an integer " + "in the range"); case Match_InvalidSImm21lsl2: return generateImmOutOfRangeError( Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index ec0d071453c3..ef647a427787 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -397,6 +397,10 @@ def simm20_pcaddu18i : SImm20Operand { let ParserMatchClass = SImmAsmOperand<20, "pcaddu18i">; } +def simm20_pcaddi : SImm20Operand { + let ParserMatchClass = SImmAsmOperand<20, "pcaddi">; +} + def simm21_lsl2 : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<21, "lsl2">; let EncoderMethod = "getImmOpValueAsr<2>"; @@ -754,7 +758,7 @@ def SLT : ALU_3R<0x00120000>; def SLTU : ALU_3R<0x00128000>; def SLTI : ALU_2RI12<0x02000000, simm12>; def SLTUI : ALU_2RI12<0x02400000, simm12>; -def PCADDI : ALU_1RI20<0x18000000, simm20>; +def PCADDI : ALU_1RI20<0x18000000, simm20_pcaddi>; def PCADDU12I : ALU_1RI20<0x1c000000, simm20>; def PCALAU12I : ALU_1RI20<0x1a000000, simm20_pcalau12i>; def AND : ALU_3R<0x00148000>; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h index 29ed14f6e4d6..370f5b0189b5 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h @@ -111,6 +111,8 @@ enum Fixups { fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX, // Generate an R_LARCH_ALIGN which indicates the linker may fixup align here. fixup_loongarch_align = FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN, + // 20-bit fixup corresponding to %pcrel_20(foo) for instruction pcaddi. + fixup_loongarch_pcrel20_s2, // 36-bit fixup corresponding to %call36(foo) for a pair instructions: // pcaddu18i+jirl. fixup_loongarch_call36 = FirstLiteralRelocationKind + ELF::R_LARCH_CALL36, @@ -142,6 +144,12 @@ enum Fixups { fixup_loongarch_tls_le_add_r, // 12-bit fixup corresponding to %le_lo12_r(foo) for instruction addi.w/d. fixup_loongarch_tls_le_lo12_r, + // 20-bit fixup corresponding to %ld_pcrel_20(foo) for instruction pcaddi. + fixup_loongarch_tls_ld_pcrel20_s2, + // 20-bit fixup corresponding to %gd_pcrel_20(foo) for instruction pcaddi. + fixup_loongarch_tls_gd_pcrel20_s2, + // 20-bit fixup corresponding to %desc_pcrel_20(foo) for instruction pcaddi. + fixup_loongarch_tls_desc_pcrel20_s2, }; } // end namespace LoongArch } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index efbfce35dbfc..4f7f93fa4783 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -287,6 +287,18 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, case LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12_R: FixupKind = LoongArch::fixup_loongarch_tls_le_lo12_r; break; + case LoongArchMCExpr::VK_LoongArch_PCREL20_S2: + FixupKind = LoongArch::fixup_loongarch_pcrel20_s2; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_LD_PCREL20_S2: + FixupKind = LoongArch::fixup_loongarch_tls_ld_pcrel20_s2; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_GD_PCREL20_S2: + FixupKind = LoongArch::fixup_loongarch_tls_gd_pcrel20_s2; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_DESC_PCREL20_S2: + FixupKind = LoongArch::fixup_loongarch_tls_desc_pcrel20_s2; + break; } } else if (Kind == MCExpr::SymbolRef && cast<MCSymbolRefExpr>(Expr)->getKind() == diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp index 98b9b1ab6d3f..53d46cca913e 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp @@ -166,6 +166,14 @@ StringRef LoongArchMCExpr::getVariantKindName(VariantKind Kind) { return "le_add_r"; case VK_LoongArch_TLS_LE_LO12_R: return "le_lo12_r"; + case VK_LoongArch_PCREL20_S2: + return "pcrel_20"; + case VK_LoongArch_TLS_LD_PCREL20_S2: + return "ld_pcrel_20"; + case VK_LoongArch_TLS_GD_PCREL20_S2: + return "gd_pcrel_20"; + case VK_LoongArch_TLS_DESC_PCREL20_S2: + return "desc_pcrel_20"; } } @@ -222,6 +230,10 @@ LoongArchMCExpr::getVariantKindForName(StringRef name) { .Case("le_hi20_r", VK_LoongArch_TLS_LE_HI20_R) .Case("le_add_r", VK_LoongArch_TLS_LE_ADD_R) .Case("le_lo12_r", VK_LoongArch_TLS_LE_LO12_R) + .Case("pcrel_20", VK_LoongArch_PCREL20_S2) + .Case("ld_pcrel_20", VK_LoongArch_TLS_LD_PCREL20_S2) + .Case("gd_pcrel_20", VK_LoongArch_TLS_GD_PCREL20_S2) + .Case("desc_pcrel_20", VK_LoongArch_TLS_DESC_PCREL20_S2) .Default(VK_LoongArch_Invalid); } @@ -264,6 +276,9 @@ void LoongArchMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { case VK_LoongArch_TLS_GD_HI20: case VK_LoongArch_TLS_DESC_PC_HI20: case VK_LoongArch_TLS_DESC_HI20: + case VK_LoongArch_TLS_LD_PCREL20_S2: + case VK_LoongArch_TLS_GD_PCREL20_S2: + case VK_LoongArch_TLS_DESC_PCREL20_S2: break; } fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h index 1546d894d56a..91215b863ccc 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h @@ -75,6 +75,10 @@ public: VK_LoongArch_TLS_LE_HI20_R, VK_LoongArch_TLS_LE_ADD_R, VK_LoongArch_TLS_LE_LO12_R, + VK_LoongArch_PCREL20_S2, + VK_LoongArch_TLS_LD_PCREL20_S2, + VK_LoongArch_TLS_GD_PCREL20_S2, + VK_LoongArch_TLS_DESC_PCREL20_S2, VK_LoongArch_Invalid // Must be the last item. }; diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 10ae81e0460e..9abe0e3186f2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -93,5 +93,8 @@ MachineBasicBlock::iterator NVPTXFrameLowering::eliminateCallFramePseudoInstr( TargetFrameLowering::DwarfFrameBase NVPTXFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { - return {DwarfFrameBase::CFA, {0}}; + DwarfFrameBase FrameBase; + FrameBase.Kind = DwarfFrameBase::CFA; + FrameBase.Location.Offset = 0; + return FrameBase; } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index fdbdc14736c8..3cb7cd9d8f22 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -17,6 +17,7 @@ def sub_un : SubRegIndex<1, 3>; def sub_32 : SubRegIndex<32>; def sub_32_hi_phony : SubRegIndex<32,32>; def sub_64 : SubRegIndex<64>; +def sub_64_hi_phony : SubRegIndex<64,64>; def sub_vsx0 : SubRegIndex<128>; def sub_vsx1 : SubRegIndex<128, 128>; def sub_gp8_x0 : SubRegIndex<64>; @@ -77,19 +78,19 @@ class VF<bits<5> num, string n> : PPCReg<n> { } // VR - One of the 32 128-bit vector registers -class VR<VF SubReg, string n> : PPCReg<n> { +class VR<VF SubReg, VF SubRegH, string n> : PPCReg<n> { let HWEncoding{4-0} = SubReg.HWEncoding{4-0}; let HWEncoding{5} = 0; - let SubRegs = [SubReg]; - let SubRegIndices = [sub_64]; + let SubRegs = [SubReg, SubRegH]; + let SubRegIndices = [sub_64, sub_64_hi_phony]; } // VSRL - One of the 32 128-bit VSX registers that overlap with the scalar // floating-point registers. -class VSRL<FPR SubReg, string n> : PPCReg<n> { +class VSRL<FPR SubReg, FPR SubRegH, string n> : PPCReg<n> { let HWEncoding = SubReg.HWEncoding; - let SubRegs = [SubReg]; - let SubRegIndices = [sub_64]; + let SubRegs = [SubReg, SubRegH]; + let SubRegIndices = [sub_64, sub_64_hi_phony]; } // VSXReg - One of the VSX registers in the range vs32-vs63 with numbering @@ -155,6 +156,22 @@ foreach Index = 0-31 in { DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; } +// The FH and VFH registers have been marked as Artifical because there are no +// instructions on PowerPC that use those register classes. They only exist +// in order to ensure that the super registers (V and VSL) are covered by their +// subregisters and have correct subregister lane masks. +let isArtificial = 1 in { + foreach Index = 0-31 in { + def FH#Index : FPR<-1, "">; + def VFH#Index : VF<-1, "">; + } +} + +let isAllocatable = 0, CopyCost = -1 in { + def VFHRC : RegisterClass<"PPC", [f64], 64, (sequence "VFH%u", 0, 31)>; + def FHRC : RegisterClass<"PPC", [f64], 64, (sequence "FH%u", 0, 31)>; +} + // Floating-point pair registers foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in { def Fpair#Index : FPPair<"fp"#Index, Index>; @@ -168,17 +185,19 @@ foreach Index = 0-31 in { DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } +let CoveredBySubRegs = 1 in { // Vector registers foreach Index = 0-31 in { - def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>, + def V#Index : VR<!cast<VF>("VF"#Index), !cast<VF>("VFH"#Index), "v"#Index>, DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } // VSX registers foreach Index = 0-31 in { - def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>, + def VSL#Index : VSRL<!cast<FPR>("F"#Index), !cast<FPR>("FH"#Index), "vs"#Index>, DwarfRegAlias<!cast<FPR>("F"#Index)>; } +} // Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for // asm printing. diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index eef6ae677ac8..db949f3476e2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3721,6 +3721,10 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0); assert(!Glue || Glue.getValueType() == MVT::Glue); + // If the EEW of True is different from vmerge's SEW, then we can't fold. + if (True.getSimpleValueType() != N->getSimpleValueType(0)) + return false; + // We require that either merge and false are the same, or that merge // is undefined. if (Merge != False && !isImplicitDef(Merge)) diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index fecc83a821f4..b6ac3384e7d3 100644 --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -429,8 +429,16 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, NumOps = Flags.getNumOperandRegisters(); // Memory constraints have two operands. - if (NumOps != 2 || !Flags.isMemKind()) + if (NumOps != 2 || !Flags.isMemKind()) { + // If the register is used by something other than a memory contraint, + // we should not fold. + for (unsigned J = 0; J < NumOps; ++J) { + const MachineOperand &MO = UseMI.getOperand(I + 1 + J); + if (MO.isReg() && MO.getReg() == DestReg) + return false; + } continue; + } // We can't do this for constraint A because AMO instructions don't have // an immediate offset field. diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index fcc61d0a5e2f..67d993a51ad9 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -201,7 +201,7 @@ public: bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, unsigned &RemainingSize) const; - bool finishLayout(const MCAssembler &Asm) const override; + void finishLayout(const MCAssembler &Asm) const override; unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override; @@ -856,7 +856,7 @@ bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, return Changed; } -bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const { +void X86AsmBackend::finishLayout(MCAssembler const &Asm) const { // See if we can further relax some instructions to cut down on the number of // nop bytes required for code alignment. The actual win is in reducing // instruction count, not number of bytes. Modern X86-64 can easily end up @@ -864,7 +864,7 @@ bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const { // (i.e. eliminate nops) even at the cost of increasing the size and // complexity of others. if (!X86PadForAlign && !X86PadForBranchAlign) - return false; + return; // The processed regions are delimitered by LabeledFragments. -g may have more // MCSymbols and therefore different relaxation results. X86PadForAlign is @@ -911,6 +911,9 @@ bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const { continue; } +#ifndef NDEBUG + const uint64_t OrigOffset = Asm.getFragmentOffset(F); +#endif const uint64_t OrigSize = Asm.computeFragmentSize(F); // To keep the effects local, prefer to relax instructions closest to @@ -923,7 +926,8 @@ bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const { // Give the backend a chance to play any tricks it wishes to increase // the encoding size of the given instruction. Target independent code // will try further relaxation, but target's may play further tricks. - padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize); + if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) + Sec.setHasLayout(false); // If we have an instruction which hasn't been fully relaxed, we can't // skip past it and insert bytes before it. Changing its starting @@ -940,6 +944,14 @@ bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const { if (F.getKind() == MCFragment::FT_BoundaryAlign) cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); +#ifndef NDEBUG + const uint64_t FinalOffset = Asm.getFragmentOffset(F); + const uint64_t FinalSize = Asm.computeFragmentSize(F); + assert(OrigOffset + OrigSize == FinalOffset + FinalSize && + "can't move start of next fragment!"); + assert(FinalSize == RemainingSize && "inconsistent size computation?"); +#endif + // If we're looking at a boundary align, make sure we don't try to pad // its target instructions for some following directive. Doing so would // break the alignment of the current boundary align. @@ -953,7 +965,11 @@ bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const { } } - return true; + // The layout is done. Mark every fragment as valid. + for (MCSection &Section : Asm) { + Asm.getFragmentOffset(*Section.curFragList()->Tail); + Asm.computeFragmentSize(*Section.curFragList()->Tail); + } } unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index c9be8ee00cdc..6b9566f1ae46 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -1233,7 +1233,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( // If V is used as the pointer operand of a compatible memory operation, // sets the pointer operand to NewV. This replacement does not change // the element type, so the resultant load/store is still valid. - CurUser->replaceUsesOfWith(V, NewV); + U.set(NewV); continue; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 4609376a748f..0abf6d77496d 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -918,6 +918,44 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, return true; } +constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) { + switch (RK) { + default: + llvm_unreachable("Unexpected recurrence kind"); + case RecurKind::Add: + return Intrinsic::vector_reduce_add; + case RecurKind::Mul: + return Intrinsic::vector_reduce_mul; + case RecurKind::And: + return Intrinsic::vector_reduce_and; + case RecurKind::Or: + return Intrinsic::vector_reduce_or; + case RecurKind::Xor: + return Intrinsic::vector_reduce_xor; + case RecurKind::FMulAdd: + case RecurKind::FAdd: + return Intrinsic::vector_reduce_fadd; + case RecurKind::FMul: + return Intrinsic::vector_reduce_fmul; + case RecurKind::SMax: + return Intrinsic::vector_reduce_smax; + case RecurKind::SMin: + return Intrinsic::vector_reduce_smin; + case RecurKind::UMax: + return Intrinsic::vector_reduce_umax; + case RecurKind::UMin: + return Intrinsic::vector_reduce_umin; + case RecurKind::FMax: + return Intrinsic::vector_reduce_fmax; + case RecurKind::FMin: + return Intrinsic::vector_reduce_fmin; + case RecurKind::FMaximum: + return Intrinsic::vector_reduce_fmaximum; + case RecurKind::FMinimum: + return Intrinsic::vector_reduce_fminimum; + } +} + unsigned llvm::getArithmeticReductionInstruction(Intrinsic::ID RdxID) { switch (RdxID) { case Intrinsic::vector_reduce_fadd: @@ -1215,12 +1253,13 @@ Value *llvm::createSimpleTargetReduction(VectorBuilder &VBuilder, Value *Src, RecurKind Kind = Desc.getRecurrenceKind(); assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) && "AnyOf reduction is not supported."); + Intrinsic::ID Id = getReductionIntrinsicID(Kind); auto *SrcTy = cast<VectorType>(Src->getType()); Type *SrcEltTy = SrcTy->getElementType(); Value *Iden = Desc.getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags()); Value *Ops[] = {Iden, Src}; - return VBuilder.createSimpleTargetReduction(Kind, SrcTy, Ops); + return VBuilder.createSimpleTargetReduction(Id, SrcTy, Ops); } Value *llvm::createTargetReduction(IRBuilderBase &B, @@ -1260,9 +1299,10 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder, assert(Src->getType()->isVectorTy() && "Expected a vector type"); assert(!Start->getType()->isVectorTy() && "Expected a scalar type"); + Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd); auto *SrcTy = cast<VectorType>(Src->getType()); Value *Ops[] = {Start, Src}; - return VBuilder.createSimpleTargetReduction(RecurKind::FAdd, SrcTy, Ops); + return VBuilder.createSimpleTargetReduction(Id, SrcTy, Ops); } void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue, |