diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-24 20:54:07 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-24 20:54:07 +0000 |
commit | b9859415507894345d9c453d19115cda87014391 (patch) | |
tree | 09b3e3071e601f46356c96cb9229e608fcf21020 /contrib | |
parent | 30c6aa249eaaeb04404e854c97b182792bf65bf8 (diff) | |
download | src-test2-b9859415507894345d9c453d19115cda87014391.tar.gz src-test2-b9859415507894345d9c453d19115cda87014391.zip |
Notes
Diffstat (limited to 'contrib')
143 files changed, 7394 insertions, 1030 deletions
diff --git a/contrib/llvm-project/FREEBSD-Xlist b/contrib/llvm-project/FREEBSD-Xlist index 10bde0818d57..5c1e8f837621 100644 --- a/contrib/llvm-project/FREEBSD-Xlist +++ b/contrib/llvm-project/FREEBSD-Xlist @@ -3,6 +3,7 @@ .clang-format .clang-tidy .git-blame-ignore-revs +.github/ .gitignore CONTRIBUTING.md README.md @@ -264,6 +265,7 @@ lldb/.clang-format lldb/.gitignore lldb/CMakeLists.txt lldb/CODE_OWNERS.txt +lldb/bindings/CMakeLists.txt lldb/cmake/ lldb/docs/.htaccess lldb/docs/CMakeLists.txt @@ -529,6 +531,7 @@ llvm/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt llvm/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt llvm/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt +llvm/lib/Extensions/ llvm/lib/Frontend/CMakeLists.txt llvm/lib/Frontend/LLVMBuild.txt llvm/lib/Frontend/OpenMP/CMakeLists.txt @@ -861,7 +864,8 @@ llvm/tools/llvm-dis/LLVMBuild.txt llvm/tools/llvm-dwarfdump/CMakeLists.txt llvm/tools/llvm-dwarfdump/LLVMBuild.txt llvm/tools/llvm-dwarfdump/fuzzer/ -llvm/tools/llvm-dwp/ +llvm/tools/llvm-dwp/CMakeLists.txt +llvm/tools/llvm-dwp/LLVMBuild.txt llvm/tools/llvm-elfabi/ llvm/tools/llvm-exegesis/ llvm/tools/llvm-extract/CMakeLists.txt @@ -908,12 +912,14 @@ llvm/tools/llvm-reduce/ llvm/tools/llvm-rtdyld/CMakeLists.txt llvm/tools/llvm-rtdyld/LLVMBuild.txt llvm/tools/llvm-shlib/ -llvm/tools/llvm-size/ +llvm/tools/llvm-size/CMakeLists.txt +llvm/tools/llvm-size/LLVMBuild.txt llvm/tools/llvm-special-case-list-fuzzer/ llvm/tools/llvm-split/ llvm/tools/llvm-stress/CMakeLists.txt llvm/tools/llvm-stress/LLVMBuild.txt -llvm/tools/llvm-strings/ +llvm/tools/llvm-strings/CMakeLists.txt +llvm/tools/llvm-strings/LLVMBuild.txt llvm/tools/llvm-symbolizer/CMakeLists.txt llvm/tools/llvm-undname/ llvm/tools/llvm-xray/CMakeLists.txt diff --git a/contrib/llvm-project/clang/include/clang/AST/DeclBase.h b/contrib/llvm-project/clang/include/clang/AST/DeclBase.h index 4ddf03d88360..1197fce41ded 100644 --- a/contrib/llvm-project/clang/include/clang/AST/DeclBase.h +++ b/contrib/llvm-project/clang/include/clang/AST/DeclBase.h @@ -856,14 +856,15 @@ public: return getParentFunctionOrMethod() == nullptr; } - /// Returns true if this declaration lexically is inside a function. - /// It recognizes non-defining declarations as well as members of local - /// classes: + /// Returns true if this declaration is lexically inside a function or inside + /// a variable initializer. It recognizes non-defining declarations as well + /// as members of local classes: /// \code /// void foo() { void bar(); } /// void foo2() { class ABC { void bar(); }; } + /// inline int x = [](){ return 0; }; /// \endcode - bool isLexicallyWithinFunctionOrMethod() const; + bool isInLocalScope() const; /// If this decl is defined inside a function/method/block it returns /// the corresponding DeclContext, otherwise it returns null. diff --git a/contrib/llvm-project/clang/include/clang/Basic/Attr.td b/contrib/llvm-project/clang/include/clang/Basic/Attr.td index 16556b5f0745..763b5b993e9a 100644 --- a/contrib/llvm-project/clang/include/clang/Basic/Attr.td +++ b/contrib/llvm-project/clang/include/clang/Basic/Attr.td @@ -685,7 +685,7 @@ def XRayLogArgs : InheritableAttr { def PatchableFunctionEntry : InheritableAttr, - TargetSpecificAttr<TargetArch<["aarch64", "x86", "x86_64"]>> { + TargetSpecificAttr<TargetArch<["aarch64", "aarch64_be", "x86", "x86_64"]>> { let Spellings = [GCC<"patchable_function_entry">]; let Subjects = SubjectList<[Function, ObjCMethod]>; let Args = [UnsignedArgument<"Count">, DefaultIntArgument<"Offset", 0>]; diff --git a/contrib/llvm-project/clang/include/clang/Driver/Options.td b/contrib/llvm-project/clang/include/clang/Driver/Options.td index 0a60873443fc..a30caaa9c50b 100644 --- a/contrib/llvm-project/clang/include/clang/Driver/Options.td +++ b/contrib/llvm-project/clang/include/clang/Driver/Options.td @@ -2267,6 +2267,14 @@ def mspeculative_load_hardening : Flag<["-"], "mspeculative-load-hardening">, Group<m_Group>, Flags<[CoreOption,CC1Option]>; def mno_speculative_load_hardening : Flag<["-"], "mno-speculative-load-hardening">, Group<m_Group>, Flags<[CoreOption]>; +def mlvi_hardening : Flag<["-"], "mlvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>, + HelpText<"Enable all mitigations for Load Value Injection (LVI)">; +def mno_lvi_hardening : Flag<["-"], "mno-lvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>, + HelpText<"Disable mitigations for Load Value Injection (LVI)">; +def mlvi_cfi : Flag<["-"], "mlvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>, + HelpText<"Enable only control-flow mitigations for Load Value Injection (LVI)">; +def mno_lvi_cfi : Flag<["-"], "mno-lvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>, + HelpText<"Disable control-flow mitigations for Load Value Injection (LVI)">; def mrelax : Flag<["-"], "mrelax">, Group<m_riscv_Features_Group>, HelpText<"Enable linker relaxation">; @@ -2439,6 +2447,12 @@ def mlongcall: Flag<["-"], "mlongcall">, Group<m_ppc_Features_Group>; def mno_longcall : Flag<["-"], "mno-longcall">, Group<m_ppc_Features_Group>; +def maix_struct_return : Flag<["-"], "maix-struct-return">, + Group<m_Group>, Flags<[CC1Option]>, + HelpText<"Return all structs in memory (PPC32 only)">; +def msvr4_struct_return : Flag<["-"], "msvr4-struct-return">, + Group<m_Group>, Flags<[CC1Option]>, + HelpText<"Return small structs in registers (PPC32 only)">; def mvx : Flag<["-"], "mvx">, Group<m_Group>; def mno_vx : Flag<["-"], "mno-vx">, Group<m_Group>; diff --git a/contrib/llvm-project/clang/lib/AST/DeclBase.cpp b/contrib/llvm-project/clang/lib/AST/DeclBase.cpp index cb4d61cac2c7..cb7c7fcbd4b8 100644 --- a/contrib/llvm-project/clang/lib/AST/DeclBase.cpp +++ b/contrib/llvm-project/clang/lib/AST/DeclBase.cpp @@ -332,13 +332,16 @@ void Decl::setDeclContextsImpl(DeclContext *SemaDC, DeclContext *LexicalDC, } } -bool Decl::isLexicallyWithinFunctionOrMethod() const { +bool Decl::isInLocalScope() const { const DeclContext *LDC = getLexicalDeclContext(); while (true) { if (LDC->isFunctionOrMethod()) return true; if (!isa<TagDecl>(LDC)) return false; + if (const auto *CRD = dyn_cast<CXXRecordDecl>(LDC)) + if (CRD->isLambda()) + return true; LDC = LDC->getLexicalParent(); } return false; diff --git a/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp b/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp index c3ebe08cba05..afa4ae5d1374 100644 --- a/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp +++ b/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp @@ -8593,6 +8593,10 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, static bool EvaluateArrayNewInitList(EvalInfo &Info, LValue &This, APValue &Result, const InitListExpr *ILE, QualType AllocType); +static bool EvaluateArrayNewConstructExpr(EvalInfo &Info, LValue &This, + APValue &Result, + const CXXConstructExpr *CCE, + QualType AllocType); bool PointerExprEvaluator::VisitCXXNewExpr(const CXXNewExpr *E) { if (!Info.getLangOpts().CPlusPlus2a) @@ -8642,6 +8646,7 @@ bool PointerExprEvaluator::VisitCXXNewExpr(const CXXNewExpr *E) { const Expr *Init = E->getInitializer(); const InitListExpr *ResizedArrayILE = nullptr; + const CXXConstructExpr *ResizedArrayCCE = nullptr; QualType AllocType = E->getAllocatedType(); if (Optional<const Expr*> ArraySize = E->getArraySize()) { @@ -8685,7 +8690,7 @@ bool PointerExprEvaluator::VisitCXXNewExpr(const CXXNewExpr *E) { // -- the new-initializer is a braced-init-list and the number of // array elements for which initializers are provided [...] // exceeds the number of elements to initialize - if (Init) { + if (Init && !isa<CXXConstructExpr>(Init)) { auto *CAT = Info.Ctx.getAsConstantArrayType(Init->getType()); assert(CAT && "unexpected type for array initializer"); @@ -8708,6 +8713,8 @@ bool PointerExprEvaluator::VisitCXXNewExpr(const CXXNewExpr *E) { // special handling for this case when we initialize. if (InitBound != AllocBound) ResizedArrayILE = cast<InitListExpr>(Init); + } else if (Init) { + ResizedArrayCCE = cast<CXXConstructExpr>(Init); } AllocType = Info.Ctx.getConstantArrayType(AllocType, ArrayBound, nullptr, @@ -8772,6 +8779,10 @@ bool PointerExprEvaluator::VisitCXXNewExpr(const CXXNewExpr *E) { if (!EvaluateArrayNewInitList(Info, Result, *Val, ResizedArrayILE, AllocType)) return false; + } else if (ResizedArrayCCE) { + if (!EvaluateArrayNewConstructExpr(Info, Result, *Val, ResizedArrayCCE, + AllocType)) + return false; } else if (Init) { if (!EvaluateInPlace(*Val, Info, Result, Init)) return false; @@ -9597,6 +9608,16 @@ static bool EvaluateArrayNewInitList(EvalInfo &Info, LValue &This, .VisitInitListExpr(ILE, AllocType); } +static bool EvaluateArrayNewConstructExpr(EvalInfo &Info, LValue &This, + APValue &Result, + const CXXConstructExpr *CCE, + QualType AllocType) { + assert(CCE->isRValue() && CCE->getType()->isArrayType() && + "not an array rvalue"); + return ArrayExprEvaluator(Info, This, Result) + .VisitCXXConstructExpr(CCE, This, &Result, AllocType); +} + // Return true iff the given array filler may depend on the element index. static bool MaybeElementDependentArrayFiller(const Expr *FillerExpr) { // For now, just whitelist non-class value-initialization and initialization diff --git a/contrib/llvm-project/clang/lib/AST/RawCommentList.cpp b/contrib/llvm-project/clang/lib/AST/RawCommentList.cpp index 83e8a0b942a4..d7124156521c 100644 --- a/contrib/llvm-project/clang/lib/AST/RawCommentList.cpp +++ b/contrib/llvm-project/clang/lib/AST/RawCommentList.cpp @@ -430,7 +430,7 @@ std::string RawComment::getFormattedText(const SourceManager &SourceMgr, }; auto DropTrailingNewLines = [](std::string &Str) { - while (Str.back() == '\n') + while (!Str.empty() && Str.back() == '\n') Str.pop_back(); }; diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/PPC.h b/contrib/llvm-project/clang/lib/Basic/Targets/PPC.h index 270aa7ff9181..ef5c2264a0b0 100644 --- a/contrib/llvm-project/clang/lib/Basic/Targets/PPC.h +++ b/contrib/llvm-project/clang/lib/Basic/Targets/PPC.h @@ -276,11 +276,12 @@ public: break; case 'Q': // Memory operand that is an offset from a register (it is // usually better to use `m' or `es' in asm statements) + Info.setAllowsRegister(); + LLVM_FALLTHROUGH; case 'Z': // Memory operand that is an indexed or indirect from a // register (it is usually better to use `m' or `es' in // asm statements) Info.setAllowsMemory(); - Info.setAllowsRegister(); break; case 'R': // AIX TOC entry case 'a': // Address operand that is an indexed or indirect from a diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp index f8866ac4f7f6..a735bdd814ed 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp @@ -1847,9 +1847,16 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, else if (const auto *SA = FD->getAttr<SectionAttr>()) F->setSection(SA->getName()); + // If we plan on emitting this inline builtin, we can't treat it as a builtin. if (FD->isInlineBuiltinDeclaration()) { - F->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoBuiltin); + const FunctionDecl *FDBody; + bool HasBody = FD->hasBody(FDBody); + (void)HasBody; + assert(HasBody && "Inline builtin declarations should always have an " + "available body!"); + if (shouldEmitFunction(FDBody)) + F->addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoBuiltin); } if (FD->isReplaceableGlobalAllocationFunction()) { diff --git a/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp index 682ef18da73b..12e17ac751b4 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp @@ -4123,12 +4123,24 @@ namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. class PPC32_SVR4_ABIInfo : public DefaultABIInfo { bool IsSoftFloatABI; + bool IsRetSmallStructInRegABI; CharUnits getParamTypeAlignment(QualType Ty) const; public: - PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI) - : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {} + PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI, + bool RetSmallStructInRegABI) + : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI), + IsRetSmallStructInRegABI(RetSmallStructInRegABI) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; @@ -4136,8 +4148,13 @@ public: class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { public: - PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI) - : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI)) {} + PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI, + bool RetSmallStructInRegABI) + : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI, + RetSmallStructInRegABI)) {} + + static bool isStructReturnInRegABI(const llvm::Triple &Triple, + const CodeGenOptions &Opts); int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. @@ -4173,6 +4190,34 @@ CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { return CharUnits::fromQuantity(4); } +ABIArgInfo PPC32_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { + uint64_t Size; + + // -msvr4-struct-return puts small aggregates in GPR3 and GPR4. + if (isAggregateTypeForABI(RetTy) && IsRetSmallStructInRegABI && + (Size = getContext().getTypeSize(RetTy)) <= 64) { + // System V ABI (1995), page 3-22, specified: + // > A structure or union whose size is less than or equal to 8 bytes + // > shall be returned in r3 and r4, as if it were first stored in the + // > 8-byte aligned memory area and then the low addressed word were + // > loaded into r3 and the high-addressed word into r4. Bits beyond + // > the last member of the structure or union are not defined. + // + // GCC for big-endian PPC32 inserts the pad before the first member, + // not "beyond the last member" of the struct. To stay compatible + // with GCC, we coerce the struct to an integer of the same size. + // LLVM will extend it and return i32 in r3, or i64 in r3:r4. + if (Size == 0) + return ABIArgInfo::getIgnore(); + else { + llvm::Type *CoerceTy = llvm::Type::getIntNTy(getVMContext(), Size); + return ABIArgInfo::getDirect(CoerceTy); + } + } + + return DefaultABIInfo::classifyReturnType(RetTy); +} + // TODO: this implementation is now likely redundant with // DefaultABIInfo::EmitVAArg. Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, @@ -4328,6 +4373,25 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, return Result; } +bool PPC32TargetCodeGenInfo::isStructReturnInRegABI( + const llvm::Triple &Triple, const CodeGenOptions &Opts) { + assert(Triple.getArch() == llvm::Triple::ppc); + + switch (Opts.getStructReturnConvention()) { + case CodeGenOptions::SRCK_Default: + break; + case CodeGenOptions::SRCK_OnStack: // -maix-struct-return + return false; + case CodeGenOptions::SRCK_InRegs: // -msvr4-struct-return + return true; + } + + if (Triple.isOSBinFormatELF() && !Triple.isOSLinux()) + return true; + + return false; +} + bool PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { @@ -9613,7 +9677,8 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, uint64_t Size = getContext().getTypeSize(Ty); // Pass floating point values via FPRs if possible. - if (IsFixed && Ty->isFloatingType() && FLen >= Size && ArgFPRsLeft) { + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && + FLen >= Size && ArgFPRsLeft) { ArgFPRsLeft--; return ABIArgInfo::getDirect(); } @@ -9852,10 +9917,14 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind)); } - case llvm::Triple::ppc: + case llvm::Triple::ppc: { + bool IsSoftFloat = + CodeGenOpts.FloatABI == "soft" || getTarget().hasFeature("spe"); + bool RetSmallStructInRegABI = + PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); return SetCGInfo( - new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft" || - getTarget().hasFeature("spe"))); + new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI)); + } case llvm::Triple::ppc64: if (Triple.isOSBinFormatELF()) { PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1; diff --git a/contrib/llvm-project/clang/lib/Driver/SanitizerArgs.cpp b/contrib/llvm-project/clang/lib/Driver/SanitizerArgs.cpp index ac9a294ee3fa..60fd932fbe6f 100644 --- a/contrib/llvm-project/clang/lib/Driver/SanitizerArgs.cpp +++ b/contrib/llvm-project/clang/lib/Driver/SanitizerArgs.cpp @@ -454,8 +454,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, << lastArgumentForMask(D, Args, Kinds & NeedsLTO) << "-flto"; } - if ((Kinds & SanitizerKind::ShadowCallStack) && - TC.getTriple().getArch() == llvm::Triple::aarch64 && + if ((Kinds & SanitizerKind::ShadowCallStack) && TC.getTriple().isAArch64() && !llvm::AArch64::isX18ReservedByDefault(TC.getTriple()) && !Args.hasArg(options::OPT_ffixed_x18)) { D.Diag(diag::err_drv_argument_only_allowed_with) diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChain.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChain.cpp index cab97b1a601a..18400d9def54 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChain.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChain.cpp @@ -954,15 +954,12 @@ SanitizerMask ToolChain::getSupportedSanitizers() const { if (getTriple().getArch() == llvm::Triple::x86 || getTriple().getArch() == llvm::Triple::x86_64 || getTriple().getArch() == llvm::Triple::arm || - getTriple().getArch() == llvm::Triple::aarch64 || getTriple().getArch() == llvm::Triple::wasm32 || - getTriple().getArch() == llvm::Triple::wasm64) + getTriple().getArch() == llvm::Triple::wasm64 || getTriple().isAArch64()) Res |= SanitizerKind::CFIICall; - if (getTriple().getArch() == llvm::Triple::x86_64 || - getTriple().getArch() == llvm::Triple::aarch64) + if (getTriple().getArch() == llvm::Triple::x86_64 || getTriple().isAArch64()) Res |= SanitizerKind::ShadowCallStack; - if (getTriple().getArch() == llvm::Triple::aarch64 || - getTriple().getArch() == llvm::Triple::aarch64_be) + if (getTriple().isAArch64()) Res |= SanitizerKind::MemTag; return Res; } diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index 8c343b8693f3..d0c082bfc53b 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -426,8 +426,9 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, if (Args.hasArg(options::OPT_ffixed_x31)) Features.push_back("+reserve-x31"); - // -mrelax is default, unless -mno-relax is specified. - if (Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, true)) + // FreeBSD local, because ld.lld doesn't support relaxations + // -mno-relax is default, unless -mrelax is specified. + if (Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, false)) Features.push_back("+relax"); else Features.push_back("-relax"); diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/X86.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/X86.cpp index fc07952ba314..32a5c0051e93 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -147,6 +147,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, // flags). This is a bit hacky but keeps existing usages working. We should // consider deprecating this and instead warn if the user requests external // retpoline thunks and *doesn't* request some form of retpolines. + auto SpectreOpt = clang::driver::options::ID::OPT_INVALID; if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline, options::OPT_mspeculative_load_hardening, options::OPT_mno_speculative_load_hardening)) { @@ -154,12 +155,14 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, false)) { Features.push_back("+retpoline-indirect-calls"); Features.push_back("+retpoline-indirect-branches"); + SpectreOpt = options::OPT_mretpoline; } else if (Args.hasFlag(options::OPT_mspeculative_load_hardening, options::OPT_mno_speculative_load_hardening, false)) { // On x86, speculative load hardening relies on at least using retpolines // for indirect calls. Features.push_back("+retpoline-indirect-calls"); + SpectreOpt = options::OPT_mspeculative_load_hardening; } } else if (Args.hasFlag(options::OPT_mretpoline_external_thunk, options::OPT_mno_retpoline_external_thunk, false)) { @@ -167,6 +170,26 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, // eventually switch to an error here. Features.push_back("+retpoline-indirect-calls"); Features.push_back("+retpoline-indirect-branches"); + SpectreOpt = options::OPT_mretpoline_external_thunk; + } + + auto LVIOpt = clang::driver::options::ID::OPT_INVALID; + if (Args.hasFlag(options::OPT_mlvi_hardening, options::OPT_mno_lvi_hardening, + false)) { + Features.push_back("+lvi-load-hardening"); + Features.push_back("+lvi-cfi"); // load hardening implies CFI protection + LVIOpt = options::OPT_mlvi_hardening; + } else if (Args.hasFlag(options::OPT_mlvi_cfi, options::OPT_mno_lvi_cfi, + false)) { + Features.push_back("+lvi-cfi"); + LVIOpt = options::OPT_mlvi_cfi; + } + + if (SpectreOpt != clang::driver::options::ID::OPT_INVALID && + LVIOpt != clang::driver::options::ID::OPT_INVALID) { + D.Diag(diag::err_drv_argument_not_allowed_with) + << D.getOpts().getOptionName(SpectreOpt) + << D.getOpts().getOptionName(LVIOpt); } // Now add any that the user explicitly requested on the command line, diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp index fa025be14e43..8b49b42598a8 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp @@ -4421,6 +4421,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(A->getValue()); } + if (Arg *A = Args.getLastArg(options::OPT_maix_struct_return, + options::OPT_msvr4_struct_return)) { + if (TC.getArch() != llvm::Triple::ppc) { + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getSpelling() << RawTriple.str(); + } else if (A->getOption().matches(options::OPT_maix_struct_return)) { + CmdArgs.push_back("-maix-struct-return"); + } else { + assert(A->getOption().matches(options::OPT_msvr4_struct_return)); + CmdArgs.push_back("-msvr4-struct-return"); + } + } + if (Arg *A = Args.getLastArg(options::OPT_fpcc_struct_return, options::OPT_freg_struct_return)) { if (TC.getArch() != llvm::Triple::x86) { diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Darwin.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Darwin.cpp index 220bc8f98351..46265c1b9f1a 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1146,6 +1146,7 @@ void Darwin::addProfileRTLibs(const ArgList &Args, addExportedSymbol(CmdArgs, "___gcov_flush"); addExportedSymbol(CmdArgs, "_flush_fn_list"); addExportedSymbol(CmdArgs, "_writeout_fn_list"); + addExportedSymbol(CmdArgs, "_reset_fn_list"); } else { addExportedSymbol(CmdArgs, "___llvm_profile_filename"); addExportedSymbol(CmdArgs, "___llvm_profile_raw_version"); diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.cpp index da197e476621..e8ef881e89ac 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.cpp @@ -309,7 +309,7 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { } } -static bool getPIE(const ArgList &Args, const toolchains::Linux &ToolChain) { +static bool getPIE(const ArgList &Args, const ToolChain &TC) { if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_r) || Args.hasArg(options::OPT_static_pie)) return false; @@ -317,17 +317,16 @@ static bool getPIE(const ArgList &Args, const toolchains::Linux &ToolChain) { Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie, options::OPT_nopie); if (!A) - return ToolChain.isPIEDefault(); + return TC.isPIEDefault(); return A->getOption().matches(options::OPT_pie); } -static bool getStaticPIE(const ArgList &Args, - const toolchains::Linux &ToolChain) { +static bool getStaticPIE(const ArgList &Args, const ToolChain &TC) { bool HasStaticPIE = Args.hasArg(options::OPT_static_pie); // -no-pie is an alias for -nopie. So, handling -nopie takes care of // -no-pie as well. if (HasStaticPIE && Args.hasArg(options::OPT_nopie)) { - const Driver &D = ToolChain.getDriver(); + const Driver &D = TC.getDriver(); const llvm::opt::OptTable &Opts = D.getOpts(); const char *StaticPIEName = Opts.getOptionName(options::OPT_static_pie); const char *NoPIEName = Opts.getOptionName(options::OPT_nopie); @@ -346,8 +345,12 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { - const toolchains::Linux &ToolChain = - static_cast<const toolchains::Linux &>(getToolChain()); + // FIXME: The Linker class constructor takes a ToolChain and not a + // Generic_ELF, so the static_cast might return a reference to a invalid + // instance (see PR45061). Ideally, the Linker constructor needs to take a + // Generic_ELF instead. + const toolchains::Generic_ELF &ToolChain = + static_cast<const toolchains::Generic_ELF &>(getToolChain()); const Driver &D = ToolChain.getDriver(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); @@ -418,8 +421,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (isAndroid) CmdArgs.push_back("--warn-shared-textrel"); - for (const auto &Opt : ToolChain.ExtraOpts) - CmdArgs.push_back(Opt.c_str()); + ToolChain.addExtraOpts(CmdArgs); CmdArgs.push_back("--eh-frame-hdr"); diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.h b/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.h index 083f74c05477..fa50b56bf954 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.h +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.h @@ -356,6 +356,12 @@ public: void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override; + + virtual std::string getDynamicLinker(const llvm::opt::ArgList &Args) const { + return {}; + } + + virtual void addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const {} }; } // end namespace toolchains diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Hurd.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Hurd.cpp index 72166ca9f359..ee91f7d73b9c 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Hurd.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Hurd.cpp @@ -61,8 +61,7 @@ static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) { return Triple.isArch32Bit() ? "lib" : "lib64"; } -Hurd::Hurd(const Driver &D, const llvm::Triple &Triple, - const ArgList &Args) +Hurd::Hurd(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : Generic_ELF(D, Triple, Args) { std::string SysRoot = computeSysRoot(); path_list &Paths = getFilePaths(); @@ -170,3 +169,8 @@ void Hurd::AddClangSystemIncludeArgs(const ArgList &DriverArgs, addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/include"); } + +void Hurd::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { + for (const auto &Opt : ExtraOpts) + CmdArgs.push_back(Opt.c_str()); +} diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Hurd.h b/contrib/llvm-project/clang/lib/Driver/ToolChains/Hurd.h index 86c6c3f734dd..8f88d7e8e58e 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Hurd.h +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Hurd.h @@ -27,9 +27,11 @@ public: AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; - virtual std::string computeSysRoot() const; + std::string computeSysRoot() const; - virtual std::string getDynamicLinker(const llvm::opt::ArgList &Args) const; + std::string getDynamicLinker(const llvm::opt::ArgList &Args) const override; + + void addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const override; std::vector<std::string> ExtraOpts; diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.cpp index bff1ab1009be..6532c899492a 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.cpp @@ -986,3 +986,8 @@ void Linux::addProfileRTLibs(const llvm::opt::ArgList &Args, Twine("-u", llvm::getInstrProfRuntimeHookVarName()))); ToolChain::addProfileRTLibs(Args, CmdArgs); } + +void Linux::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { + for (const auto &Opt : ExtraOpts) + CmdArgs.push_back(Opt.c_str()); +} diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.h b/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.h index f5518eac218a..923ebecbd215 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.h +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.h @@ -42,7 +42,9 @@ public: llvm::opt::ArgStringList &CmdArgs) const override; virtual std::string computeSysRoot() const; - virtual std::string getDynamicLinker(const llvm::opt::ArgList &Args) const; + std::string getDynamicLinker(const llvm::opt::ArgList &Args) const override; + + void addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const override; std::vector<std::string> ExtraOpts; diff --git a/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp index 70bcd7048c55..8cb786a4d343 100644 --- a/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp +++ b/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp @@ -2176,6 +2176,10 @@ static bool isFunctionDeclarationName(const FormatToken &Current, Next = Next->Next; continue; } + if (Next->is(TT_TemplateOpener) && Next->MatchingParen) { + Next = Next->MatchingParen; + continue; + } break; } @@ -2705,20 +2709,40 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, tok::l_square)); if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; - if (Right.isOneOf(tok::star, tok::amp, tok::ampamp) && - (Left.is(tok::identifier) || Left.isSimpleTypeSpecifier()) && - // Space between the type and the * in: - // operator void*() - // operator char*() - // operator /*comment*/ const char*() - // operator volatile /*comment*/ char*() - // operator Foo*() - // dependent on PointerAlignment style. - Left.Previous && - (Left.Previous->endsSequence(tok::kw_operator) || - Left.Previous->endsSequence(tok::kw_const, tok::kw_operator) || - Left.Previous->endsSequence(tok::kw_volatile, tok::kw_operator))) - return (Style.PointerAlignment != FormatStyle::PAS_Left); + if (Right.is(tok::star) && Left.is(tok::star)) + return false; + if (Right.isOneOf(tok::star, tok::amp, tok::ampamp)) { + const FormatToken *Previous = &Left; + while (Previous && !Previous->is(tok::kw_operator)) { + if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) { + Previous = Previous->getPreviousNonComment(); + continue; + } + if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) { + Previous = Previous->MatchingParen->getPreviousNonComment(); + continue; + } + if (Previous->is(tok::coloncolon)) { + Previous = Previous->getPreviousNonComment(); + continue; + } + break; + } + // Space between the type and the * in: + // operator void*() + // operator char*() + // operator /*comment*/ const char*() + // operator volatile /*comment*/ char*() + // operator Foo*() + // operator C<T>*() + // operator std::Foo*() + // operator C<T>::D<U>*() + // dependent on PointerAlignment style. + if (Previous && (Previous->endsSequence(tok::kw_operator) || + Previous->endsSequence(tok::kw_const, tok::kw_operator) || + Previous->endsSequence(tok::kw_volatile, tok::kw_operator))) + return (Style.PointerAlignment != FormatStyle::PAS_Left); + } const auto SpaceRequiredForArrayInitializerLSquare = [](const FormatToken &LSquareTok, const FormatStyle &Style) { return Style.SpacesInContainerLiterals || diff --git a/contrib/llvm-project/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm-project/clang/lib/Frontend/CompilerInvocation.cpp index e98a407ac42f..11e56f2331b4 100644 --- a/contrib/llvm-project/clang/lib/Frontend/CompilerInvocation.cpp +++ b/contrib/llvm-project/clang/lib/Frontend/CompilerInvocation.cpp @@ -1279,11 +1279,18 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } - if (Arg *A = Args.getLastArg(OPT_fpcc_struct_return, OPT_freg_struct_return)) { - if (A->getOption().matches(OPT_fpcc_struct_return)) { + // X86_32 has -fppc-struct-return and -freg-struct-return. + // PPC32 has -maix-struct-return and -msvr4-struct-return. + if (Arg *A = + Args.getLastArg(OPT_fpcc_struct_return, OPT_freg_struct_return, + OPT_maix_struct_return, OPT_msvr4_struct_return)) { + const Option &O = A->getOption(); + if (O.matches(OPT_fpcc_struct_return) || + O.matches(OPT_maix_struct_return)) { Opts.setStructReturnConvention(CodeGenOptions::SRCK_OnStack); } else { - assert(A->getOption().matches(OPT_freg_struct_return)); + assert(O.matches(OPT_freg_struct_return) || + O.matches(OPT_msvr4_struct_return)); Opts.setStructReturnConvention(CodeGenOptions::SRCK_InRegs); } } diff --git a/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp index c38c724ed9b0..264c903209af 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp @@ -3817,6 +3817,9 @@ TypeResult Sema::ActOnTagTemplateIdType(TagUseKind TUK, SourceLocation LAngleLoc, ASTTemplateArgsPtr TemplateArgsIn, SourceLocation RAngleLoc) { + if (SS.isInvalid()) + return TypeResult(true); + TemplateName Template = TemplateD.get(); // Translate the parser's template argument list in our AST format. @@ -5925,7 +5928,9 @@ bool UnnamedLocalNoLinkageFinder::VisitDependentNameType( bool UnnamedLocalNoLinkageFinder::VisitDependentTemplateSpecializationType( const DependentTemplateSpecializationType* T) { - return VisitNestedNameSpecifier(T->getQualifier()); + if (auto *Q = T->getQualifier()) + return VisitNestedNameSpecifier(Q); + return false; } bool UnnamedLocalNoLinkageFinder::VisitPackExpansionType( @@ -5979,6 +5984,7 @@ bool UnnamedLocalNoLinkageFinder::VisitTagDecl(const TagDecl *Tag) { bool UnnamedLocalNoLinkageFinder::VisitNestedNameSpecifier( NestedNameSpecifier *NNS) { + assert(NNS); if (NNS->getPrefix() && VisitNestedNameSpecifier(NNS->getPrefix())) return true; diff --git a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiate.cpp b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiate.cpp index b5d2ab1f31f2..c53c37ee109f 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2343,7 +2343,7 @@ ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm, UnparsedDefaultArgInstantiations[OldParm].push_back(NewParm); } else if (Expr *Arg = OldParm->getDefaultArg()) { FunctionDecl *OwningFunc = cast<FunctionDecl>(OldParm->getDeclContext()); - if (OwningFunc->isLexicallyWithinFunctionOrMethod()) { + if (OwningFunc->isInLocalScope()) { // Instantiate default arguments for methods of local classes (DR1484) // and non-defining declarations. Sema::ContextRAII SavedContext(*this, OwningFunc); diff --git a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 37dace3bee7f..f801e79c8902 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -4367,7 +4367,7 @@ TemplateDeclInstantiator::InitFunctionInstantiation(FunctionDecl *New, EPI.ExceptionSpec.Type != EST_None && EPI.ExceptionSpec.Type != EST_DynamicNone && EPI.ExceptionSpec.Type != EST_BasicNoexcept && - !Tmpl->isLexicallyWithinFunctionOrMethod()) { + !Tmpl->isInLocalScope()) { FunctionDecl *ExceptionSpecTemplate = Tmpl; if (EPI.ExceptionSpec.Type == EST_Uninstantiated) ExceptionSpecTemplate = EPI.ExceptionSpec.SourceTemplate; diff --git a/contrib/llvm-project/clang/lib/Sema/TreeTransform.h b/contrib/llvm-project/clang/lib/Sema/TreeTransform.h index 0305954a278e..bbc6fc6deeef 100644 --- a/contrib/llvm-project/clang/lib/Sema/TreeTransform.h +++ b/contrib/llvm-project/clang/lib/Sema/TreeTransform.h @@ -4022,50 +4022,8 @@ template<typename Derived> void TreeTransform<Derived>::InventTemplateArgumentLoc( const TemplateArgument &Arg, TemplateArgumentLoc &Output) { - SourceLocation Loc = getDerived().getBaseLocation(); - switch (Arg.getKind()) { - case TemplateArgument::Null: - llvm_unreachable("null template argument in TreeTransform"); - break; - - case TemplateArgument::Type: - Output = TemplateArgumentLoc(Arg, - SemaRef.Context.getTrivialTypeSourceInfo(Arg.getAsType(), Loc)); - - break; - - case TemplateArgument::Template: - case TemplateArgument::TemplateExpansion: { - NestedNameSpecifierLocBuilder Builder; - TemplateName Template = Arg.getAsTemplateOrTemplatePattern(); - if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) - Builder.MakeTrivial(SemaRef.Context, DTN->getQualifier(), Loc); - else if (QualifiedTemplateName *QTN = Template.getAsQualifiedTemplateName()) - Builder.MakeTrivial(SemaRef.Context, QTN->getQualifier(), Loc); - - if (Arg.getKind() == TemplateArgument::Template) - Output = TemplateArgumentLoc(Arg, - Builder.getWithLocInContext(SemaRef.Context), - Loc); - else - Output = TemplateArgumentLoc(Arg, - Builder.getWithLocInContext(SemaRef.Context), - Loc, Loc); - - break; - } - - case TemplateArgument::Expression: - Output = TemplateArgumentLoc(Arg, Arg.getAsExpr()); - break; - - case TemplateArgument::Declaration: - case TemplateArgument::Integral: - case TemplateArgument::Pack: - case TemplateArgument::NullPtr: - Output = TemplateArgumentLoc(Arg, TemplateArgumentLocInfo()); - break; - } + Output = getSema().getTrivialTemplateArgumentLoc( + Arg, QualType(), getDerived().getBaseLocation()); } template<typename Derived> @@ -4075,12 +4033,45 @@ bool TreeTransform<Derived>::TransformTemplateArgument( const TemplateArgument &Arg = Input.getArgument(); switch (Arg.getKind()) { case TemplateArgument::Null: - case TemplateArgument::Integral: case TemplateArgument::Pack: - case TemplateArgument::Declaration: - case TemplateArgument::NullPtr: llvm_unreachable("Unexpected TemplateArgument"); + case TemplateArgument::Integral: + case TemplateArgument::NullPtr: + case TemplateArgument::Declaration: { + // Transform a resolved template argument straight to a resolved template + // argument. We get here when substituting into an already-substituted + // template type argument during concept satisfaction checking. + QualType T = Arg.getNonTypeTemplateArgumentType(); + QualType NewT = getDerived().TransformType(T); + if (NewT.isNull()) + return true; + + ValueDecl *D = Arg.getKind() == TemplateArgument::Declaration + ? Arg.getAsDecl() + : nullptr; + ValueDecl *NewD = D ? cast_or_null<ValueDecl>(getDerived().TransformDecl( + getDerived().getBaseLocation(), D)) + : nullptr; + if (D && !NewD) + return true; + + if (NewT == T && D == NewD) + Output = Input; + else if (Arg.getKind() == TemplateArgument::Integral) + Output = TemplateArgumentLoc( + TemplateArgument(getSema().Context, Arg.getAsIntegral(), NewT), + TemplateArgumentLocInfo()); + else if (Arg.getKind() == TemplateArgument::NullPtr) + Output = TemplateArgumentLoc(TemplateArgument(NewT, /*IsNullPtr=*/true), + TemplateArgumentLocInfo()); + else + Output = TemplateArgumentLoc(TemplateArgument(NewD, NewT), + TemplateArgumentLocInfo()); + + return false; + } + case TemplateArgument::Type: { TypeSourceInfo *DI = Input.getTypeSourceInfo(); if (!DI) @@ -11837,19 +11828,6 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) { LSI->CallOperator = NewCallOperator; - for (unsigned I = 0, NumParams = NewCallOperator->getNumParams(); - I != NumParams; ++I) { - auto *P = NewCallOperator->getParamDecl(I); - if (P->hasUninstantiatedDefaultArg()) { - EnterExpressionEvaluationContext Eval( - getSema(), - Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed, P); - ExprResult R = getDerived().TransformExpr( - E->getCallOperator()->getParamDecl(I)->getDefaultArg()); - P->setDefaultArg(R.get()); - } - } - getDerived().transformAttrs(E->getCallOperator(), NewCallOperator); getDerived().transformedLocalDecl(E->getCallOperator(), {NewCallOperator}); diff --git a/contrib/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp b/contrib/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp index 3df1c064923a..35a35f904069 100644 --- a/contrib/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/contrib/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp @@ -335,14 +335,38 @@ public: SourceRange Range, const MacroArgs *Args) override { if (!Collector) return; - // Only record top-level expansions, not those where: + const auto &SM = Collector->PP.getSourceManager(); + // Only record top-level expansions that directly produce expanded tokens. + // This excludes those where: // - the macro use is inside a macro body, // - the macro appears in an argument to another macro. - if (!MacroNameTok.getLocation().isFileID() || - (LastExpansionEnd.isValid() && - Collector->PP.getSourceManager().isBeforeInTranslationUnit( - Range.getBegin(), LastExpansionEnd))) + // However macro expansion isn't really a tree, it's token rewrite rules, + // so there are other cases, e.g. + // #define B(X) X + // #define A 1 + B + // A(2) + // Both A and B produce expanded tokens, though the macro name 'B' comes + // from an expansion. The best we can do is merge the mappings for both. + + // The *last* token of any top-level macro expansion must be in a file. + // (In the example above, see the closing paren of the expansion of B). + if (!Range.getEnd().isFileID()) return; + // If there's a current expansion that encloses this one, this one can't be + // top-level. + if (LastExpansionEnd.isValid() && + !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd())) + return; + + // If the macro invocation (B) starts in a macro (A) but ends in a file, + // we'll create a merged mapping for A + B by overwriting the endpoint for + // A's startpoint. + if (!Range.getBegin().isFileID()) { + Range.setBegin(SM.getExpansionLoc(Range.getBegin())); + assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) && + "Overlapping macros should have same expansion location"); + } + Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd(); LastExpansionEnd = Range.getEnd(); } @@ -399,197 +423,167 @@ public: } TokenBuffer build() && { - buildSpelledTokens(); - - // Walk over expanded tokens and spelled tokens in parallel, building the - // mappings between those using source locations. - // To correctly recover empty macro expansions, we also take locations - // reported to PPCallbacks::MacroExpands into account as we do not have any - // expanded tokens with source locations to guide us. - - // The 'eof' token is special, it is not part of spelled token stream. We - // handle it separately at the end. assert(!Result.ExpandedTokens.empty()); assert(Result.ExpandedTokens.back().kind() == tok::eof); - for (unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) { - // (!) I might be updated by the following call. - processExpandedToken(I); - } - // 'eof' not handled in the loop, do it here. - assert(SM.getMainFileID() == - SM.getFileID(Result.ExpandedTokens.back().location())); - fillGapUntil(Result.Files[SM.getMainFileID()], - Result.ExpandedTokens.back().location(), - Result.ExpandedTokens.size() - 1); - Result.Files[SM.getMainFileID()].EndExpanded = Result.ExpandedTokens.size(); + // Tokenize every file that contributed tokens to the expanded stream. + buildSpelledTokens(); - // Some files might have unaccounted spelled tokens at the end, add an empty - // mapping for those as they did not have expanded counterparts. - fillGapsAtEndOfFiles(); + // The expanded token stream consists of runs of tokens that came from + // the same source (a macro expansion, part of a file etc). + // Between these runs are the logical positions of spelled tokens that + // didn't expand to anything. + while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) { + // Create empty mappings for spelled tokens that expanded to nothing here. + // May advance NextSpelled, but NextExpanded is unchanged. + discard(); + // Create mapping for a contiguous run of expanded tokens. + // Advances NextExpanded past the run, and NextSpelled accordingly. + unsigned OldPosition = NextExpanded; + advance(); + if (NextExpanded == OldPosition) + diagnoseAdvanceFailure(); + } + // If any tokens remain in any of the files, they didn't expand to anything. + // Create empty mappings up until the end of the file. + for (const auto &File : Result.Files) + discard(File.first); return std::move(Result); } private: - /// Process the next token in an expanded stream and move corresponding - /// spelled tokens, record any mapping if needed. - /// (!) \p I will be updated if this had to skip tokens, e.g. for macros. - void processExpandedToken(unsigned &I) { - auto L = Result.ExpandedTokens[I].location(); - if (L.isMacroID()) { - processMacroExpansion(SM.getExpansionRange(L), I); - return; + // Consume a sequence of spelled tokens that didn't expand to anything. + // In the simplest case, skips spelled tokens until finding one that produced + // the NextExpanded token, and creates an empty mapping for them. + // If Drain is provided, skips remaining tokens from that file instead. + void discard(llvm::Optional<FileID> Drain = llvm::None) { + SourceLocation Target = + Drain ? SM.getLocForEndOfFile(*Drain) + : SM.getExpansionLoc( + Result.ExpandedTokens[NextExpanded].location()); + FileID File = SM.getFileID(Target); + const auto &SpelledTokens = Result.Files[File].SpelledTokens; + auto &NextSpelled = this->NextSpelled[File]; + + TokenBuffer::Mapping Mapping; + Mapping.BeginSpelled = NextSpelled; + // When dropping trailing tokens from a file, the empty mapping should + // be positioned within the file's expanded-token range (at the end). + Mapping.BeginExpanded = Mapping.EndExpanded = + Drain ? Result.Files[*Drain].EndExpanded : NextExpanded; + // We may want to split into several adjacent empty mappings. + // FlushMapping() emits the current mapping and starts a new one. + auto FlushMapping = [&, this] { + Mapping.EndSpelled = NextSpelled; + if (Mapping.BeginSpelled != Mapping.EndSpelled) + Result.Files[File].Mappings.push_back(Mapping); + Mapping.BeginSpelled = NextSpelled; + }; + + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() < Target) { + // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion) + // then we want to partition our (empty) mapping. + // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target) + SourceLocation KnownEnd = CollectedExpansions.lookup( + SpelledTokens[NextSpelled].location().getRawEncoding()); + if (KnownEnd.isValid()) { + FlushMapping(); // Emits [Start, NextSpelled) + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() <= KnownEnd) + ++NextSpelled; + FlushMapping(); // Emits [NextSpelled, KnownEnd] + // Now the loop contitues and will emit (KnownEnd, Target). + } else { + ++NextSpelled; + } } - if (L.isFileID()) { - auto FID = SM.getFileID(L); - TokenBuffer::MarkedFile &File = Result.Files[FID]; - - fillGapUntil(File, L, I); + FlushMapping(); + } - // Skip the token. - assert(File.SpelledTokens[NextSpelled[FID]].location() == L && - "no corresponding token in the spelled stream"); - ++NextSpelled[FID]; - return; + // Consumes the NextExpanded token and others that are part of the same run. + // Increases NextExpanded and NextSpelled by at least one, and adds a mapping + // (unless this is a run of file tokens, which we represent with no mapping). + void advance() { + const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded]; + SourceLocation Expansion = SM.getExpansionLoc(Tok.location()); + FileID File = SM.getFileID(Expansion); + const auto &SpelledTokens = Result.Files[File].SpelledTokens; + auto &NextSpelled = this->NextSpelled[File]; + + if (Tok.location().isFileID()) { + // A run of file tokens continues while the expanded/spelled tokens match. + while (NextSpelled < SpelledTokens.size() && + NextExpanded < Result.ExpandedTokens.size() && + SpelledTokens[NextSpelled].location() == + Result.ExpandedTokens[NextExpanded].location()) { + ++NextSpelled; + ++NextExpanded; + } + // We need no mapping for file tokens copied to the expanded stream. + } else { + // We found a new macro expansion. We should have its spelling bounds. + auto End = CollectedExpansions.lookup(Expansion.getRawEncoding()); + assert(End.isValid() && "Macro expansion wasn't captured?"); + + // Mapping starts here... + TokenBuffer::Mapping Mapping; + Mapping.BeginExpanded = NextExpanded; + Mapping.BeginSpelled = NextSpelled; + // ... consumes spelled tokens within bounds we captured ... + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() <= End) + ++NextSpelled; + // ... consumes expanded tokens rooted at the same expansion ... + while (NextExpanded < Result.ExpandedTokens.size() && + SM.getExpansionLoc( + Result.ExpandedTokens[NextExpanded].location()) == Expansion) + ++NextExpanded; + // ... and ends here. + Mapping.EndExpanded = NextExpanded; + Mapping.EndSpelled = NextSpelled; + Result.Files[File].Mappings.push_back(Mapping); } } - /// Skipped expanded and spelled tokens of a macro expansion that covers \p - /// SpelledRange. Add a corresponding mapping. - /// (!) \p I will be the index of the last token in an expansion after this - /// function returns. - void processMacroExpansion(CharSourceRange SpelledRange, unsigned &I) { - auto FID = SM.getFileID(SpelledRange.getBegin()); - assert(FID == SM.getFileID(SpelledRange.getEnd())); - TokenBuffer::MarkedFile &File = Result.Files[FID]; - - fillGapUntil(File, SpelledRange.getBegin(), I); - - // Skip all expanded tokens from the same macro expansion. - unsigned BeginExpanded = I; - for (; I + 1 < Result.ExpandedTokens.size(); ++I) { - auto NextL = Result.ExpandedTokens[I + 1].location(); - if (!NextL.isMacroID() || - SM.getExpansionLoc(NextL) != SpelledRange.getBegin()) - break; + // advance() is supposed to consume at least one token - if not, we crash. + void diagnoseAdvanceFailure() { +#ifndef NDEBUG + // Show the failed-to-map token in context. + for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10; + I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) { + const char *L = + (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " "; + llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n"; } - unsigned EndExpanded = I + 1; - consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded, - EndExpanded, NextSpelled[FID]); +#endif + llvm_unreachable("Couldn't map expanded token to spelled tokens!"); } /// Initializes TokenBuffer::Files and fills spelled tokens and expanded /// ranges for each of the files. void buildSpelledTokens() { for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) { - auto FID = - SM.getFileID(SM.getExpansionLoc(Result.ExpandedTokens[I].location())); + const auto &Tok = Result.ExpandedTokens[I]; + auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location())); auto It = Result.Files.try_emplace(FID); TokenBuffer::MarkedFile &File = It.first->second; - File.EndExpanded = I + 1; + // The eof token should not be considered part of the main-file's range. + File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1; + if (!It.second) continue; // we have seen this file before. - // This is the first time we see this file. File.BeginExpanded = I; File.SpelledTokens = tokenize(FID, SM, LangOpts); } } - void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset, - unsigned ExpandedIndex, unsigned &SpelledIndex) { - consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex); - } - - /// Consumes spelled tokens that form a macro expansion and adds a entry to - /// the resulting token buffer. - /// (!) SpelledIndex is updated in-place. - void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset, - unsigned BeginExpanded, unsigned EndExpanded, - unsigned &SpelledIndex) { - // We need to record this mapping before continuing. - unsigned MappingBegin = SpelledIndex; - ++SpelledIndex; - - bool HitMapping = - tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue(); - (void)HitMapping; - assert(!HitMapping && "recursive macro expansion?"); - - TokenBuffer::Mapping M; - M.BeginExpanded = BeginExpanded; - M.EndExpanded = EndExpanded; - M.BeginSpelled = MappingBegin; - M.EndSpelled = SpelledIndex; - - File.Mappings.push_back(M); - } - - /// Consumes spelled tokens until location \p L is reached and adds a mapping - /// covering the consumed tokens. The mapping will point to an empty expanded - /// range at position \p ExpandedIndex. - void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L, - unsigned ExpandedIndex) { - assert(L.isFileID()); - FileID FID; - unsigned Offset; - std::tie(FID, Offset) = SM.getDecomposedLoc(L); - - unsigned &SpelledIndex = NextSpelled[FID]; - unsigned MappingBegin = SpelledIndex; - while (true) { - auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex); - if (SpelledIndex != MappingBegin) { - TokenBuffer::Mapping M; - M.BeginSpelled = MappingBegin; - M.EndSpelled = SpelledIndex; - M.BeginExpanded = M.EndExpanded = ExpandedIndex; - File.Mappings.push_back(M); - } - if (!EndLoc) - break; - consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex, - SpelledIndex); - - MappingBegin = SpelledIndex; - } - }; - - /// Consumes spelled tokens until it reaches Offset or a mapping boundary, - /// i.e. a name of a macro expansion or the start '#' token of a PP directive. - /// (!) NextSpelled is updated in place. - /// - /// returns None if \p Offset was reached, otherwise returns the end location - /// of a mapping that starts at \p NextSpelled. - llvm::Optional<SourceLocation> - tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset, - unsigned &NextSpelled) { - for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) { - auto L = File.SpelledTokens[NextSpelled].location(); - if (Offset <= SM.getFileOffset(L)) - return llvm::None; // reached the offset we are looking for. - auto Mapping = CollectedExpansions.find(L.getRawEncoding()); - if (Mapping != CollectedExpansions.end()) - return Mapping->second; // found a mapping before the offset. - } - return llvm::None; // no more tokens, we "reached" the offset. - } - - /// Adds empty mappings for unconsumed spelled tokens at the end of each file. - void fillGapsAtEndOfFiles() { - for (auto &F : Result.Files) { - if (F.second.SpelledTokens.empty()) - continue; - fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(), - F.second.EndExpanded); - } - } - TokenBuffer Result; - /// For each file, a position of the next spelled token we will consume. - llvm::DenseMap<FileID, unsigned> NextSpelled; + unsigned NextExpanded = 0; // cursor in ExpandedTokens + llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens PPExpansions CollectedExpansions; const SourceManager &SM; const LangOptions &LangOpts; diff --git a/contrib/llvm-project/clang/utils/TableGen/ClangAttrEmitter.cpp b/contrib/llvm-project/clang/utils/TableGen/ClangAttrEmitter.cpp index 4c3742c8e339..2fce9d428137 100644 --- a/contrib/llvm-project/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/contrib/llvm-project/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -2825,6 +2825,7 @@ void EmitClangAttrPCHRead(RecordKeeper &Records, raw_ostream &OS) { if (R.isSubClassOf(InhClass)) OS << " bool isInherited = Record.readInt();\n"; OS << " bool isImplicit = Record.readInt();\n"; + OS << " bool isPackExpansion = Record.readInt();\n"; ArgRecords = R.getValueAsListOfDefs("Args"); Args.clear(); for (const auto *Arg : ArgRecords) { @@ -2840,6 +2841,7 @@ void EmitClangAttrPCHRead(RecordKeeper &Records, raw_ostream &OS) { if (R.isSubClassOf(InhClass)) OS << " cast<InheritableAttr>(New)->setInherited(isInherited);\n"; OS << " New->setImplicit(isImplicit);\n"; + OS << " New->setPackExpansion(isPackExpansion);\n"; OS << " break;\n"; OS << " }\n"; } @@ -2866,6 +2868,7 @@ void EmitClangAttrPCHWrite(RecordKeeper &Records, raw_ostream &OS) { if (R.isSubClassOf(InhClass)) OS << " Record.push_back(SA->isInherited());\n"; OS << " Record.push_back(A->isImplicit());\n"; + OS << " Record.push_back(A->isPackExpansion());\n"; for (const auto *Arg : Args) createArgument(*Arg, R.getName())->writePCHWrite(OS); diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/riscv/int_mul_impl.inc b/contrib/llvm-project/compiler-rt/lib/builtins/riscv/int_mul_impl.inc new file mode 100644 index 000000000000..50951d5f4195 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/builtins/riscv/int_mul_impl.inc @@ -0,0 +1,31 @@ +//===-- int_mul_impl.inc - Integer multiplication -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Helpers used by __mulsi3, __muldi3. +// +//===----------------------------------------------------------------------===// + +#if !defined(__riscv_mul) + .text + .align 2 + + .globl __mulxi3 + .type __mulxi3, @function +__mulxi3: + mv a2, a0 + mv a0, zero +.L1: + andi a3, a1, 1 + beqz a3, .L2 + add a0, a0, a2 +.L2: + srli a1, a1, 1 + slli a2, a2, 1 + bnez a1, .L1 + ret +#endif diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/riscv/muldi3.S b/contrib/llvm-project/compiler-rt/lib/builtins/riscv/muldi3.S new file mode 100644 index 000000000000..9e292e8dd8b9 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/builtins/riscv/muldi3.S @@ -0,0 +1,11 @@ +//===--- muldi3.S - Integer multiplication routines -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#if __riscv_xlen == 64 +#define __mulxi3 __muldi3 +#include "int_mul_impl.inc" +#endif diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/riscv/mulsi3.S b/contrib/llvm-project/compiler-rt/lib/builtins/riscv/mulsi3.S index 5464919b26b9..cfafb7a0d7b3 100644 --- a/contrib/llvm-project/compiler-rt/lib/builtins/riscv/mulsi3.S +++ b/contrib/llvm-project/compiler-rt/lib/builtins/riscv/mulsi3.S @@ -1,4 +1,4 @@ -//===--- mulsi3.S - Integer multiplication routines routines ---===// +//===--- mulsi3.S - Integer multiplication routines -----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,22 +6,7 @@ // //===----------------------------------------------------------------------===// -#if !defined(__riscv_mul) && __riscv_xlen == 32 - .text - .align 2 - - .globl __mulsi3 - .type __mulsi3, @function -__mulsi3: - mv a2, a0 - mv a0, zero -.L1: - andi a3, a1, 1 - beqz a3, .L2 - add a0, a0, a2 -.L2: - srli a1, a1, 1 - slli a2, a2, 1 - bnez a1, .L1 - ret +#if __riscv_xlen == 32 +#define __mulxi3 __mulsi3 +#include "int_mul_impl.inc" #endif diff --git a/contrib/llvm-project/compiler-rt/lib/profile/GCDAProfiling.c b/contrib/llvm-project/compiler-rt/lib/profile/GCDAProfiling.c index 498c05900bf2..124be3c13af6 100644 --- a/contrib/llvm-project/compiler-rt/lib/profile/GCDAProfiling.c +++ b/contrib/llvm-project/compiler-rt/lib/profile/GCDAProfiling.c @@ -32,8 +32,10 @@ #include <windows.h> #include "WindowsMMap.h" #else -#include <sys/mman.h> #include <sys/file.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <unistd.h> #endif #if defined(__FreeBSD__) && defined(__i386__) @@ -119,6 +121,11 @@ struct fn_list writeout_fn_list; */ struct fn_list flush_fn_list; +/* + * A list of reset functions, shared between all dynamic objects. + */ +struct fn_list reset_fn_list; + static void fn_list_insert(struct fn_list* list, fn_ptr fn) { struct fn_node* new_node = malloc(sizeof(struct fn_node)); new_node->fn = fn; @@ -634,7 +641,46 @@ void llvm_delete_flush_function_list(void) { } COMPILER_RT_VISIBILITY -void llvm_gcov_init(fn_ptr wfn, fn_ptr ffn) { +void llvm_register_reset_function(fn_ptr fn) { + fn_list_insert(&reset_fn_list, fn); +} + +COMPILER_RT_VISIBILITY +void llvm_delete_reset_function_list(void) { fn_list_remove(&reset_fn_list); } + +COMPILER_RT_VISIBILITY +void llvm_reset_counters(void) { + struct fn_node *curr = reset_fn_list.head; + + while (curr) { + if (curr->id == CURRENT_ID) { + curr->fn(); + } + curr = curr->next; + } +} + +#if !defined(_WIN32) +COMPILER_RT_VISIBILITY +pid_t __gcov_fork() { + pid_t parent_pid = getpid(); + pid_t pid = fork(); + + if (pid == 0) { + pid_t child_pid = getpid(); + if (child_pid != parent_pid) { + // The pid changed so we've a fork (one could have its own fork function) + // Just reset the counters for this child process + // threads. + llvm_reset_counters(); + } + } + return pid; +} +#endif + +COMPILER_RT_VISIBILITY +void llvm_gcov_init(fn_ptr wfn, fn_ptr ffn, fn_ptr rfn) { static int atexit_ran = 0; if (wfn) @@ -643,10 +689,14 @@ void llvm_gcov_init(fn_ptr wfn, fn_ptr ffn) { if (ffn) llvm_register_flush_function(ffn); + if (rfn) + llvm_register_reset_function(rfn); + if (atexit_ran == 0) { atexit_ran = 1; /* Make sure we write out the data and delete the data structures. */ + atexit(llvm_delete_reset_function_list); atexit(llvm_delete_flush_function_list); atexit(llvm_delete_writeout_function_list); atexit(llvm_writeout_files); diff --git a/contrib/llvm-project/libcxx/include/array b/contrib/llvm-project/libcxx/include/array index 88e9d57ff783..ddebf9159600 100644 --- a/contrib/llvm-project/libcxx/include/array +++ b/contrib/llvm-project/libcxx/include/array @@ -359,7 +359,7 @@ struct _LIBCPP_TEMPLATE_VIS array<_Tp, 0> #ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES template<class _Tp, class... _Args, - class = typename enable_if<(is_same_v<_Tp, _Args> && ...), void>::type + class = _EnableIf<__all<_IsSame<_Tp, _Args>::value...>::value> > array(_Tp, _Args...) -> array<_Tp, 1 + sizeof...(_Args)>; diff --git a/contrib/llvm-project/lld/COFF/Chunks.h b/contrib/llvm-project/lld/COFF/Chunks.h index 7ae4ee735f4a..2be2a72c4a1e 100644 --- a/contrib/llvm-project/lld/COFF/Chunks.h +++ b/contrib/llvm-project/lld/COFF/Chunks.h @@ -486,7 +486,9 @@ public: class ImportThunkChunkARM : public ImportThunkChunk { public: - explicit ImportThunkChunkARM(Defined *s) : ImportThunkChunk(s) {} + explicit ImportThunkChunkARM(Defined *s) : ImportThunkChunk(s) { + setAlignment(2); + } size_t getSize() const override { return sizeof(importThunkARM); } void getBaserels(std::vector<Baserel> *res) override; void writeTo(uint8_t *buf) const override; @@ -494,14 +496,16 @@ public: class ImportThunkChunkARM64 : public ImportThunkChunk { public: - explicit ImportThunkChunkARM64(Defined *s) : ImportThunkChunk(s) {} + explicit ImportThunkChunkARM64(Defined *s) : ImportThunkChunk(s) { + setAlignment(4); + } size_t getSize() const override { return sizeof(importThunkARM64); } void writeTo(uint8_t *buf) const override; }; class RangeExtensionThunkARM : public NonSectionChunk { public: - explicit RangeExtensionThunkARM(Defined *t) : target(t) {} + explicit RangeExtensionThunkARM(Defined *t) : target(t) { setAlignment(2); } size_t getSize() const override; void writeTo(uint8_t *buf) const override; diff --git a/contrib/llvm-project/lld/COFF/DLL.cpp b/contrib/llvm-project/lld/COFF/DLL.cpp index 39d9fbab63d5..50301ad91b1d 100644 --- a/contrib/llvm-project/lld/COFF/DLL.cpp +++ b/contrib/llvm-project/lld/COFF/DLL.cpp @@ -365,7 +365,9 @@ public: class ThunkChunkARM : public NonSectionChunk { public: - ThunkChunkARM(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {} + ThunkChunkARM(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) { + setAlignment(2); + } size_t getSize() const override { return sizeof(thunkARM); } @@ -385,7 +387,9 @@ public: class TailMergeChunkARM : public NonSectionChunk { public: - TailMergeChunkARM(Chunk *d, Defined *h) : desc(d), helper(h) {} + TailMergeChunkARM(Chunk *d, Defined *h) : desc(d), helper(h) { + setAlignment(2); + } size_t getSize() const override { return sizeof(tailMergeARM); } @@ -405,7 +409,9 @@ public: class ThunkChunkARM64 : public NonSectionChunk { public: - ThunkChunkARM64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {} + ThunkChunkARM64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) { + setAlignment(4); + } size_t getSize() const override { return sizeof(thunkARM64); } @@ -422,7 +428,9 @@ public: class TailMergeChunkARM64 : public NonSectionChunk { public: - TailMergeChunkARM64(Chunk *d, Defined *h) : desc(d), helper(h) {} + TailMergeChunkARM64(Chunk *d, Defined *h) : desc(d), helper(h) { + setAlignment(4); + } size_t getSize() const override { return sizeof(tailMergeARM64); } diff --git a/contrib/llvm-project/lld/COFF/MarkLive.cpp b/contrib/llvm-project/lld/COFF/MarkLive.cpp index 6d34cb864e3c..0afa615a1933 100644 --- a/contrib/llvm-project/lld/COFF/MarkLive.cpp +++ b/contrib/llvm-project/lld/COFF/MarkLive.cpp @@ -28,10 +28,12 @@ void markLive(ArrayRef<Chunk *> chunks) { // as we push, so sections never appear twice in the list. SmallVector<SectionChunk *, 256> worklist; - // COMDAT section chunks are dead by default. Add non-COMDAT chunks. + // COMDAT section chunks are dead by default. Add non-COMDAT chunks. Do not + // traverse DWARF sections. They are live, but they should not keep other + // sections alive. for (Chunk *c : chunks) if (auto *sc = dyn_cast<SectionChunk>(c)) - if (sc->live) + if (sc->live && !sc->isDWARF()) worklist.push_back(sc); auto enqueue = [&](SectionChunk *c) { diff --git a/contrib/llvm-project/lld/ELF/Driver.cpp b/contrib/llvm-project/lld/ELF/Driver.cpp index 25330832339c..6de9698bb2c8 100644 --- a/contrib/llvm-project/lld/ELF/Driver.cpp +++ b/contrib/llvm-project/lld/ELF/Driver.cpp @@ -1906,8 +1906,17 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // We do not want to emit debug sections if --strip-all // or -strip-debug are given. - return config->strip != StripPolicy::None && - (s->name.startswith(".debug") || s->name.startswith(".zdebug")); + if (config->strip == StripPolicy::None) + return false; + + if (isDebugSection(*s)) + return true; + if (auto *isec = dyn_cast<InputSection>(s)) + if (InputSectionBase *rel = isec->getRelocatedSection()) + if (isDebugSection(*rel)) + return true; + + return false; }); // Now that the number of partitions is fixed, save a pointer to the main diff --git a/contrib/llvm-project/lld/ELF/InputSection.cpp b/contrib/llvm-project/lld/ELF/InputSection.cpp index a59e61976f30..8613e0d68e67 100644 --- a/contrib/llvm-project/lld/ELF/InputSection.cpp +++ b/contrib/llvm-project/lld/ELF/InputSection.cpp @@ -441,8 +441,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) { // See the comment in maybeReportUndefined for PPC32 .got2 and PPC64 .toc auto *d = dyn_cast<Defined>(&sym); if (!d) { - if (!sec->name.startswith(".debug") && - !sec->name.startswith(".zdebug") && sec->name != ".eh_frame" && + if (!isDebugSection(*sec) && sec->name != ".eh_frame" && sec->name != ".gcc_except_table" && sec->name != ".got2" && sec->name != ".toc") { uint32_t secIdx = cast<Undefined>(sym).discardedSecIdx; diff --git a/contrib/llvm-project/lld/ELF/InputSection.h b/contrib/llvm-project/lld/ELF/InputSection.h index 3c42af7db7b4..fe2c3c516a96 100644 --- a/contrib/llvm-project/lld/ELF/InputSection.h +++ b/contrib/llvm-project/lld/ELF/InputSection.h @@ -357,6 +357,10 @@ private: template <class ELFT> void copyShtGroup(uint8_t *buf); }; +inline bool isDebugSection(const InputSectionBase &sec) { + return sec.name.startswith(".debug") || sec.name.startswith(".zdebug"); +} + // The list of all input sections. extern std::vector<InputSectionBase *> inputSections; diff --git a/contrib/llvm-project/lld/ELF/OutputSections.cpp b/contrib/llvm-project/lld/ELF/OutputSections.cpp index 6142cb0783ce..b609878be319 100644 --- a/contrib/llvm-project/lld/ELF/OutputSections.cpp +++ b/contrib/llvm-project/lld/ELF/OutputSections.cpp @@ -114,8 +114,7 @@ void OutputSection::commitSection(InputSection *isec) { flags = isec->flags; } else { // Otherwise, check if new type or flags are compatible with existing ones. - unsigned mask = SHF_TLS | SHF_LINK_ORDER; - if ((flags & mask) != (isec->flags & mask)) + if ((flags ^ isec->flags) & SHF_TLS) error("incompatible section flags for " + name + "\n>>> " + toString(isec) + ": 0x" + utohexstr(isec->flags) + "\n>>> output section " + name + ": 0x" + utohexstr(flags)); @@ -367,8 +366,9 @@ void OutputSection::finalize() { // all InputSections in the OutputSection have the same dependency. if (auto *ex = dyn_cast<ARMExidxSyntheticSection>(first)) link = ex->getLinkOrderDep()->getParent()->sectionIndex; - else if (auto *d = first->getLinkOrderDep()) - link = d->getParent()->sectionIndex; + else if (first->flags & SHF_LINK_ORDER) + if (auto *d = first->getLinkOrderDep()) + link = d->getParent()->sectionIndex; } if (type == SHT_GROUP) { diff --git a/contrib/llvm-project/lld/ELF/ScriptLexer.cpp b/contrib/llvm-project/lld/ELF/ScriptLexer.cpp index e0ff56fec3f3..1fed3d06227e 100644 --- a/contrib/llvm-project/lld/ELF/ScriptLexer.cpp +++ b/contrib/llvm-project/lld/ELF/ScriptLexer.cpp @@ -52,6 +52,8 @@ StringRef ScriptLexer::getLine() { // Returns 1-based line number of the current token. size_t ScriptLexer::getLineNumber() { + if (pos == 0) + return 1; StringRef s = getCurrentMB().getBuffer(); StringRef tok = tokens[pos - 1]; return s.substr(0, tok.data() - s.data()).count('\n') + 1; @@ -292,7 +294,9 @@ static bool encloses(StringRef s, StringRef t) { MemoryBufferRef ScriptLexer::getCurrentMB() { // Find input buffer containing the current token. - assert(!mbs.empty() && pos > 0); + assert(!mbs.empty()); + if (pos == 0) + return mbs.back(); for (MemoryBufferRef mb : mbs) if (encloses(mb.getBuffer(), tokens[pos - 1])) return mb; diff --git a/contrib/llvm-project/lld/ELF/ScriptParser.cpp b/contrib/llvm-project/lld/ELF/ScriptParser.cpp index fd8de3b54bd7..80ec8b655b04 100644 --- a/contrib/llvm-project/lld/ELF/ScriptParser.cpp +++ b/contrib/llvm-project/lld/ELF/ScriptParser.cpp @@ -737,6 +737,7 @@ bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok1, Stri expect("("); if (consume("NOLOAD")) { cmd->noload = true; + cmd->type = SHT_NOBITS; } else { skip(); // This is "COPY", "INFO" or "OVERLAY". cmd->nonAlloc = true; diff --git a/contrib/llvm-project/lld/ELF/Writer.cpp b/contrib/llvm-project/lld/ELF/Writer.cpp index 43ec63945d62..7114ea2efb4d 100644 --- a/contrib/llvm-project/lld/ELF/Writer.cpp +++ b/contrib/llvm-project/lld/ELF/Writer.cpp @@ -1524,17 +1524,30 @@ template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() { // but sort must consider them all at once. std::vector<InputSection **> scriptSections; std::vector<InputSection *> sections; + bool started = false, stopped = false; for (BaseCommand *base : sec->sectionCommands) { if (auto *isd = dyn_cast<InputSectionDescription>(base)) { for (InputSection *&isec : isd->sections) { - scriptSections.push_back(&isec); - sections.push_back(isec); - - InputSection *link = isec->getLinkOrderDep(); - if (!link->getParent()) - error(toString(isec) + ": sh_link points to discarded section " + - toString(link)); + if (!(isec->flags & SHF_LINK_ORDER)) { + if (started) + stopped = true; + } else if (stopped) { + error(toString(isec) + ": SHF_LINK_ORDER sections in " + sec->name + + " are not contiguous"); + } else { + started = true; + + scriptSections.push_back(&isec); + sections.push_back(isec); + + InputSection *link = isec->getLinkOrderDep(); + if (!link->getParent()) + error(toString(isec) + ": sh_link points to discarded section " + + toString(link)); + } } + } else if (started) { + stopped = true; } } diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ValueLattice.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ValueLattice.h index 56519d7d0857..415c32e7c23c 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/ValueLattice.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ValueLattice.h @@ -29,7 +29,7 @@ class ValueLatticeElement { /// producing instruction is dead. Caution: We use this as the starting /// state in our local meet rules. In this usage, it's taken to mean /// "nothing known yet". - undefined, + unknown, /// This Value has a specific constant value. (For constant integers, /// constantrange is used instead. Integer typed constantexprs can appear @@ -45,7 +45,12 @@ class ValueLatticeElement { constantrange, /// We can not precisely model the dynamic values this value might take. - overdefined + overdefined, + + /// This Value is an UndefValue constant or produces undef. Undefined values + /// can be merged with constants (or single element constant ranges), + /// assuming all uses of the result will be replaced. + undef }; ValueLatticeElementTy Tag; @@ -60,14 +65,15 @@ class ValueLatticeElement { public: // Const and Range are initialized on-demand. - ValueLatticeElement() : Tag(undefined) {} + ValueLatticeElement() : Tag(unknown) {} /// Custom destructor to ensure Range is properly destroyed, when the object /// is deallocated. ~ValueLatticeElement() { switch (Tag) { case overdefined: - case undefined: + case unknown: + case undef: case constant: case notconstant: break; @@ -79,7 +85,7 @@ public: /// Custom copy constructor, to ensure Range gets initialized when /// copying a constant range lattice element. - ValueLatticeElement(const ValueLatticeElement &Other) : Tag(undefined) { + ValueLatticeElement(const ValueLatticeElement &Other) : Tag(unknown) { *this = Other; } @@ -109,7 +115,8 @@ public: ConstVal = Other.ConstVal; break; case overdefined: - case undefined: + case unknown: + case undef: break; } Tag = Other.Tag; @@ -118,14 +125,16 @@ public: static ValueLatticeElement get(Constant *C) { ValueLatticeElement Res; - if (!isa<UndefValue>(C)) + if (isa<UndefValue>(C)) + Res.markUndef(); + else Res.markConstant(C); return Res; } static ValueLatticeElement getNot(Constant *C) { ValueLatticeElement Res; - if (!isa<UndefValue>(C)) - Res.markNotConstant(C); + assert(!isa<UndefValue>(C) && "!= undef is not supported"); + Res.markNotConstant(C); return Res; } static ValueLatticeElement getRange(ConstantRange CR) { @@ -139,7 +148,10 @@ public: return Res; } - bool isUndefined() const { return Tag == undefined; } + bool isUndef() const { return Tag == undef; } + bool isUnknown() const { return Tag == unknown; } + bool isUnknownOrUndef() const { return Tag == unknown || Tag == undef; } + bool isUndefined() const { return isUnknownOrUndef(); } bool isConstant() const { return Tag == constant; } bool isNotConstant() const { return Tag == notconstant; } bool isConstantRange() const { return Tag == constantrange; } @@ -170,89 +182,123 @@ public: return None; } -private: - void markOverdefined() { + bool markOverdefined() { if (isOverdefined()) - return; + return false; if (isConstant() || isNotConstant()) ConstVal = nullptr; if (isConstantRange()) Range.~ConstantRange(); Tag = overdefined; + return true; } - void markConstant(Constant *V) { - assert(V && "Marking constant with NULL"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - markConstantRange(ConstantRange(CI->getValue())); - return; - } + bool markUndef() { + if (isUndef()) + return false; + + assert(isUnknown()); + Tag = undef; + return true; + } + + bool markConstant(Constant *V) { if (isa<UndefValue>(V)) - return; + return markUndef(); - assert((!isConstant() || getConstant() == V) && - "Marking constant with different value"); - assert(isUndefined()); + if (isConstant()) { + assert(getConstant() == V && "Marking constant with different value"); + return false; + } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return markConstantRange(ConstantRange(CI->getValue())); + + assert(isUnknown() || isUndef()); Tag = constant; ConstVal = V; + return true; } - void markNotConstant(Constant *V) { + bool markNotConstant(Constant *V) { assert(V && "Marking constant with NULL"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - markConstantRange(ConstantRange(CI->getValue() + 1, CI->getValue())); - return; - } + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return markConstantRange( + ConstantRange(CI->getValue() + 1, CI->getValue())); + if (isa<UndefValue>(V)) - return; + return false; - assert((!isConstant() || getConstant() != V) && - "Marking constant !constant with same value"); - assert((!isNotConstant() || getNotConstant() == V) && - "Marking !constant with different value"); - assert(isUndefined() || isConstant()); + if (isNotConstant()) { + assert(getNotConstant() == V && "Marking !constant with different value"); + return false; + } + + assert(isUnknown()); Tag = notconstant; ConstVal = V; + return true; } - void markConstantRange(ConstantRange NewR) { + /// Mark the object as constant range with \p NewR. If the object is already a + /// constant range, nothing changes if the existing range is equal to \p + /// NewR. Otherwise \p NewR must be a superset of the existing range or the + /// object must be undef. + bool markConstantRange(ConstantRange NewR) { if (isConstantRange()) { + if (getConstantRange() == NewR) + return false; + if (NewR.isEmptySet()) - markOverdefined(); - else { - Range = std::move(NewR); - } - return; + return markOverdefined(); + + assert(NewR.contains(getConstantRange()) && + "Existing range must be a subset of NewR"); + Range = std::move(NewR); + return true; } - assert(isUndefined()); + assert(isUnknown() || isUndef()); if (NewR.isEmptySet()) - markOverdefined(); - else { - Tag = constantrange; - new (&Range) ConstantRange(std::move(NewR)); - } + return markOverdefined(); + + Tag = constantrange; + new (&Range) ConstantRange(std::move(NewR)); + return true; } -public: /// Updates this object to approximate both this object and RHS. Returns /// true if this object has been changed. bool mergeIn(const ValueLatticeElement &RHS, const DataLayout &DL) { - if (RHS.isUndefined() || isOverdefined()) + if (RHS.isUnknown() || isOverdefined()) return false; if (RHS.isOverdefined()) { markOverdefined(); return true; } - if (isUndefined()) { + if (isUndef()) { + assert(!RHS.isUnknown()); + if (RHS.isUndef()) + return false; + if (RHS.isConstant()) + return markConstant(RHS.getConstant()); + if (RHS.isConstantRange() && RHS.getConstantRange().isSingleElement()) + return markConstantRange(RHS.getConstantRange()); + return markOverdefined(); + } + + if (isUnknown()) { + assert(!RHS.isUnknown() && "Unknow RHS should be handled earlier"); *this = RHS; - return !RHS.isUndefined(); + return true; } if (isConstant()) { if (RHS.isConstant() && getConstant() == RHS.getConstant()) return false; + if (RHS.isUndef()) + return false; markOverdefined(); return true; } @@ -265,6 +311,9 @@ public: } assert(isConstantRange() && "New ValueLattice type?"); + if (RHS.isUndef() && getConstantRange().isSingleElement()) + return false; + if (!RHS.isConstantRange()) { // We can get here if we've encountered a constantexpr of integer type // and merge it with a constantrange. @@ -273,18 +322,11 @@ public: } ConstantRange NewR = getConstantRange().unionWith(RHS.getConstantRange()); if (NewR.isFullSet()) - markOverdefined(); + return markOverdefined(); else if (NewR == getConstantRange()) return false; else - markConstantRange(std::move(NewR)); - return true; - } - - ConstantInt *getConstantInt() const { - assert(isConstant() && isa<ConstantInt>(getConstant()) && - "No integer constant"); - return cast<ConstantInt>(getConstant()); + return markConstantRange(std::move(NewR)); } /// Compares this symbolic value with Other using Pred and returns either @@ -292,7 +334,7 @@ public: /// evaluated. Constant *getCompare(CmpInst::Predicate Pred, Type *Ty, const ValueLatticeElement &Other) const { - if (isUndefined() || Other.isUndefined()) + if (isUnknownOrUndef() || Other.isUnknownOrUndef()) return UndefValue::get(Ty); if (isConstant() && Other.isConstant()) diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFGraph.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h index 585f43e116f9..585f43e116f9 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFGraph.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFLiveness.h index ea4890271726..ea4890271726 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFLiveness.h diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFRegisters.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFRegisters.h index 4afaf80e4659..4afaf80e4659 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFRegisters.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFRegisters.h diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h index 3b103c227708..bb8cc032e28d 100644 --- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h +++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h @@ -71,6 +71,11 @@ public: template <typename CreateFunc> TypeIndex insertRecordAs(GloballyHashedType Hash, size_t RecordSize, CreateFunc Create) { + assert(RecordSize < UINT32_MAX && "Record too big"); + assert(RecordSize % 4 == 0 && + "RecordSize is not a multiple of 4 bytes which will cause " + "misalignment in the output TPI stream!"); + auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex()); if (LLVM_UNLIKELY(Result.second /*inserted*/ || diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td index f87317445753..fc9fa2153aea 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -488,6 +488,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmsumudm : GCCBuiltin<"__builtin_altivec_vmsumudm">, + Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v1i128_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def index 6b25ef2ca435..050059e36a25 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def @@ -152,6 +152,10 @@ AARCH64_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC)) AARCH64_CPU_NAME("thunderx2t99", ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_NONE)) +AARCH64_CPU_NAME("thunderx3t110", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_CRC | AEK_CRYPTO | AEK_FP | AEK_SIMD | + AEK_LSE | AEK_RAND | AArch64::AEK_PROFILE | + AArch64::AEK_RAS)) AARCH64_CPU_NAME("thunderx", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("thunderxt88", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/contrib/llvm-project/llvm/include/llvm/Support/ManagedStatic.h b/contrib/llvm-project/llvm/include/llvm/Support/ManagedStatic.h index bbd0d04ed040..f2b41422f131 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/ManagedStatic.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/ManagedStatic.h @@ -40,8 +40,8 @@ template <typename T, size_t N> struct object_deleter<T[N]> { // constexpr, a dynamic initializer may be emitted depending on optimization // settings. For the affected versions of MSVC, use the old linker // initialization pattern of not providing a constructor and leaving the fields -// uninitialized. -#if !defined(_MSC_VER) || defined(__clang__) +// uninitialized. See http://llvm.org/PR41367 for details. +#if !defined(_MSC_VER) || (_MSC_VER >= 1925) || defined(__clang__) #define LLVM_USE_CONSTEXPR_CTOR #endif diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td index 1700c6c4640d..46ad5a619770 100644 --- a/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -959,6 +959,10 @@ def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = 1; let MemoryVT = i32; } +def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = f16; +} def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = 1; let MemoryVT = f32; @@ -1094,6 +1098,11 @@ def truncstorei32 : PatFrag<(ops node:$val, node:$ptr), let IsStore = 1; let MemoryVT = i32; } +def truncstoref16 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = f16; +} def truncstoref32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = 1; diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp index bad2de9e5f5e..7ae7a1fd5493 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp @@ -96,9 +96,9 @@ static ValueLatticeElement intersect(const ValueLatticeElement &A, const ValueLatticeElement &B) { // Undefined is the strongest state. It means the value is known to be along // an unreachable path. - if (A.isUndefined()) + if (A.isUnknown()) return A; - if (B.isUndefined()) + if (B.isUnknown()) return B; // If we gave up for one, but got a useable fact from the other, use it. @@ -1203,7 +1203,7 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, // false SETNE. if (isTrueDest == (Predicate == ICmpInst::ICMP_EQ)) return ValueLatticeElement::get(cast<Constant>(RHS)); - else + else if (!isa<UndefValue>(RHS)) return ValueLatticeElement::getNot(cast<Constant>(RHS)); } } @@ -1722,7 +1722,7 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB, const DataLayout &DL = BB->getModule()->getDataLayout(); ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); - if (Result.isUndefined()) + if (Result.isUnknown()) return ConstantRange::getEmpty(Width); if (Result.isConstantRange()) return Result.getConstantRange(); @@ -1761,7 +1761,7 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V, ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); - if (Result.isUndefined()) + if (Result.isUnknown()) return ConstantRange::getEmpty(Width); if (Result.isConstantRange()) return Result.getConstantRange(); @@ -1991,7 +1991,7 @@ void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( for (auto &Arg : F->args()) { ValueLatticeElement Result = LVIImpl->getValueInBlock( const_cast<Argument *>(&Arg), const_cast<BasicBlock *>(BB)); - if (Result.isUndefined()) + if (Result.isUnknown()) continue; OS << "; LatticeVal for: '" << Arg << "' is: " << Result << "\n"; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueLattice.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueLattice.cpp index a0115a0eec36..eaf8885cc14e 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ValueLattice.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ValueLattice.cpp @@ -10,8 +10,10 @@ namespace llvm { raw_ostream &operator<<(raw_ostream &OS, const ValueLatticeElement &Val) { - if (Val.isUndefined()) - return OS << "undefined"; + if (Val.isUnknown()) + return OS << "unknown"; + if (Val.isUndef()) + return OS << "undef"; if (Val.isOverdefined()) return OS << "overdefined"; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp index 4b9c50aeb1d3..35964b2cdbda 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp @@ -963,10 +963,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, continue; } - // If one of the blocks is the entire common tail (and not the entry - // block, which we can't jump to), we can treat all blocks with this same - // tail at once. Use PredBB if that is one of the possibilities, as that - // will not introduce any extra branches. + // If one of the blocks is the entire common tail (and is not the entry + // block/an EH pad, which we can't jump to), we can treat all blocks with + // this same tail at once. Use PredBB if that is one of the possibilities, + // as that will not introduce any extra branches. MachineBasicBlock *EntryBB = &MergePotentials.front().getBlock()->getParent()->front(); unsigned commonTailIndex = SameTails.size(); @@ -974,19 +974,21 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // into the other. if (SameTails.size() == 2 && SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) && - SameTails[1].tailIsWholeBlock()) + SameTails[1].tailIsWholeBlock() && !SameTails[1].getBlock()->isEHPad()) commonTailIndex = 1; else if (SameTails.size() == 2 && SameTails[1].getBlock()->isLayoutSuccessor( - SameTails[0].getBlock()) && - SameTails[0].tailIsWholeBlock()) + SameTails[0].getBlock()) && + SameTails[0].tailIsWholeBlock() && + !SameTails[0].getBlock()->isEHPad()) commonTailIndex = 0; else { // Otherwise just pick one, favoring the fall-through predecessor if // there is one. for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { MachineBasicBlock *MBB = SameTails[i].getBlock(); - if (MBB == EntryBB && SameTails[i].tailIsWholeBlock()) + if ((MBB == EntryBB || MBB->isEHPad()) && + SameTails[i].tailIsWholeBlock()) continue; if (MBB == PredBB) { commonTailIndex = i; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index a4ba197b7a1d..239b6fd6fd68 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -269,30 +269,26 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // into and they are all PHI nodes. In this case, machine-sink must break // the critical edge first. e.g. // - // %bb.1: derived from LLVM BB %bb4.preheader + // %bb.1: // Predecessors according to CFG: %bb.0 // ... - // %reg16385 = DEC64_32r %reg16437, implicit-def dead %eflags + // %def = DEC64_32r %x, implicit-def dead %eflags // ... // JE_4 <%bb.37>, implicit %eflags // Successors according to CFG: %bb.37 %bb.2 // - // %bb.2: derived from LLVM BB %bb.nph - // Predecessors according to CFG: %bb.0 %bb.1 - // %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1 - BreakPHIEdge = true; - for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { - MachineInstr *UseInst = MO.getParent(); - unsigned OpNo = &MO - &UseInst->getOperand(0); - MachineBasicBlock *UseBlock = UseInst->getParent(); - if (!(UseBlock == MBB && UseInst->isPHI() && - UseInst->getOperand(OpNo+1).getMBB() == DefMBB)) { - BreakPHIEdge = false; - break; - } - } - if (BreakPHIEdge) + // %bb.2: + // %p = PHI %y, %bb.0, %def, %bb.1 + if (llvm::all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) { + MachineInstr *UseInst = MO.getParent(); + unsigned OpNo = UseInst->getOperandNo(&MO); + MachineBasicBlock *UseBlock = UseInst->getParent(); + return UseBlock == MBB && UseInst->isPHI() && + UseInst->getOperand(OpNo + 1).getMBB() == DefMBB; + })) { + BreakPHIEdge = true; return true; + } for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { // Determine the block of the use. diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp index 0cb35dc98819..437a6b030096 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFGraph.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp @@ -8,8 +8,6 @@ // // Target-independent, SSA-based data flow graph for register data flow (RDF). // -#include "RDFGraph.h" -#include "RDFRegisters.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -20,6 +18,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -753,8 +753,10 @@ RegisterSet DataFlowGraph::getLandingPadLiveIns() const { const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); if (RegisterId R = TLI.getExceptionPointerRegister(PF)) LR.insert(RegisterRef(R)); - if (RegisterId R = TLI.getExceptionSelectorRegister(PF)) - LR.insert(RegisterRef(R)); + if (!isFuncletEHPersonality(classifyEHPersonality(PF))) { + if (RegisterId R = TLI.getExceptionSelectorRegister(PF)) + LR.insert(RegisterRef(R)); + } return LR; } diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp index e2c007c9d01a..0bcd27f8ea45 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp @@ -22,9 +22,6 @@ // and Embedded Architectures and Compilers", 8 (4), // <10.1145/2086696.2086706>. <hal-00647369> // -#include "RDFLiveness.h" -#include "RDFGraph.h" -#include "RDFRegisters.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -33,6 +30,9 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFRegisters.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp index b5675784e34b..bd8661816e71 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFRegisters.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "RDFRegisters.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8ff04797c8d8..2476fd26f250 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -886,6 +886,13 @@ static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); } +// Determine if this an indexed load with an opaque target constant index. +static bool canSplitIdx(LoadSDNode *LD) { + return MaySplitLoadIndex && + (LD->getOperand(2).getOpcode() != ISD::TargetConstant || + !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque()); +} + bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, const SDLoc &DL, SDValue N0, @@ -14222,11 +14229,11 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { if (LD->isIndexed()) { - bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC || - LD->getAddressingMode() == ISD::POST_DEC); - unsigned Opc = IsSub ? ISD::SUB : ISD::ADD; - SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(), - LD->getOperand(1), LD->getOperand(2)); + // Cannot handle opaque target constants and we must respect the user's + // request not to split indexes from loads. + if (!canSplitIdx(LD)) + return SDValue(); + SDValue Idx = SplitIndexingFromLoad(LD); SDValue Ops[] = {Val, Idx, Chain}; return CombineTo(LD, Ops, 3); } @@ -14322,14 +14329,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // the indexing into an add/sub directly (that TargetConstant may not be // valid for a different type of node, and we cannot convert an opaque // target constant into a regular constant). - bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && - cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); + bool CanSplitIdx = canSplitIdx(LD); - if (!N->hasAnyUseOfValue(0) && - ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { + if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); SDValue Index; - if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { + if (N->hasAnyUseOfValue(1) && CanSplitIdx) { Index = SplitIndexingFromLoad(LD); // Try to fold the base pointer arithmetic into subsequent loads and // stores. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 8294591b7326..6ecde9b43c07 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -225,6 +225,21 @@ static bool isRegUsedByPhiNodes(unsigned DefReg, return false; } +static bool isTerminatingEHLabel(MachineBasicBlock *MBB, MachineInstr &MI) { + // Ignore non-EH labels. + if (!MI.isEHLabel()) + return false; + + // Any EH label outside a landing pad must be for an invoke. Consider it a + // terminator. + if (!MBB->isEHPad()) + return true; + + // If this is a landingpad, the first non-phi instruction will be an EH_LABEL. + // Don't consider that label to be a terminator. + return MI.getIterator() != MBB->getFirstNonPHI(); +} + /// Build a map of instruction orders. Return the first terminator and its /// order. Consider EH_LABEL instructions to be terminators as well, since local /// values for phis after invokes must be materialized before the call. @@ -233,7 +248,7 @@ void FastISel::InstOrderMap::initialize( unsigned Order = 0; for (MachineInstr &I : *MBB) { if (!FirstTerminator && - (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) { + (I.isTerminator() || isTerminatingEHLabel(MBB, I))) { FirstTerminator = &I; FirstTerminatorOrder = Order; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 6aed5796acc6..015b3d99fb0f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -271,8 +271,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, return Res.getValue(1); } - SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + // Op2 is used for the comparison and thus must be extended according to the + // target's atomic operations. Op3 is merely stored and so can be left alone. + SDValue Op2 = N->getOperand(2); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); + if (TLI.getTargetMachine().getTargetTriple().isRISCV()) { + // The comparison argument must be sign-extended for RISC-V. This is + // abstracted using a new TargetLowering hook in the main LLVM development + // branch, but handled here directly in order to fix the codegen bug for + // 10.x without breaking the libLLVM.so ABI. + Op2 = SExtPromotedInteger(Op2); + } else { + Op2 = GetPromotedInteger(Op2); + } + SDVTList VTs = DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other); SDValue Res = DAG.getAtomicCmpSwap( diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 24ab65171a17..368e2100031f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5490,9 +5490,20 @@ char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, EVT VT = Op.getValueType(); const SDNodeFlags Flags = Op->getFlags(); const TargetOptions &Options = DAG.getTarget().Options; - if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND && - isFPExtFree(VT, Op.getOperand(0).getValueType()))) - return 0; + if (!Op.hasOneUse()) { + bool IsFreeExtend = Op.getOpcode() == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType()); + + // If we already have the use of the negated floating constant, it is free + // to negate it even it has multiple uses. + bool IsFreeConstant = + Op.getOpcode() == ISD::ConstantFP && + !getNegatedExpression(Op, DAG, LegalOperations, ForCodeSize) + .use_empty(); + + if (!IsFreeExtend && !IsFreeConstant) + return 0; + } // Don't recurse exponentially. if (Depth > SelectionDAG::MaxRecursionDepth) @@ -5687,14 +5698,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, ForCodeSize, Depth + 1); char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, ForCodeSize, Depth + 1); - // TODO: This is a hack. It is possible that costs have changed between now - // and the initial calls to isNegatibleForFree(). That is because we - // are rewriting the expression, and that may change the number of - // uses (and therefore the cost) of values. If the negation costs are - // equal, only negate this value if it is a constant. Otherwise, try - // operand 1. A better fix would eliminate uses as a cost factor or - // track the change in uses as we rewrite the expression. - if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) { + if (V0 > V1) { // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) SDValue Neg0 = getNegatedExpression( Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp index 4d7cd468f3ee..6924b0e0ca02 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp @@ -90,7 +90,9 @@ static inline ArrayRef<uint8_t> stabilize(BumpPtrAllocator &Alloc, TypeIndex MergingTypeTableBuilder::insertRecordAs(hash_code Hash, ArrayRef<uint8_t> &Record) { assert(Record.size() < UINT32_MAX && "Record too big"); - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + assert(Record.size() % 4 == 0 && + "The type record size is not a multiple of 4 bytes which will cause " + "misalignment in the output TPI stream!"); LocallyHashedType WeakHash{Hash, Record}; auto Result = HashedRecords.try_emplace(WeakHash, nextTypeIndex()); diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index f9fca74a2199..c233db5c1d06 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -360,16 +360,18 @@ Error TypeStreamMerger::remapType(const CVType &Type) { [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> { return remapIndices(Type, Storage); }; + unsigned AlignedSize = alignTo(Type.RecordData.size(), 4); + if (LLVM_LIKELY(UseGlobalHashes)) { GlobalTypeTableBuilder &Dest = isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream; GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()]; - DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize); + DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize); } else { MergingTypeTableBuilder &Dest = isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; - RemapStorage.resize(Type.RecordData.size()); + RemapStorage.resize(AlignedSize); ArrayRef<uint8_t> Result = DoSerialize(RemapStorage); if (!Result.empty()) DestIdx = Dest.insertRecordBytes(Result); @@ -386,9 +388,15 @@ Error TypeStreamMerger::remapType(const CVType &Type) { ArrayRef<uint8_t> TypeStreamMerger::remapIndices(const CVType &OriginalType, MutableArrayRef<uint8_t> Storage) { + unsigned Align = OriginalType.RecordData.size() & 3; + unsigned AlignedSize = alignTo(OriginalType.RecordData.size(), 4); + assert(Storage.size() == AlignedSize && + "The storage buffer size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); + SmallVector<TiReference, 4> Refs; discoverTypeIndices(OriginalType.RecordData, Refs); - if (Refs.empty()) + if (Refs.empty() && Align == 0) return OriginalType.RecordData; ::memcpy(Storage.data(), OriginalType.RecordData.data(), @@ -408,6 +416,16 @@ TypeStreamMerger::remapIndices(const CVType &OriginalType, return {}; } } + + if (Align > 0) { + RecordPrefix *StorageHeader = + reinterpret_cast<RecordPrefix *>(Storage.data()); + StorageHeader->RecordLen += 4 - Align; + + DestContent = Storage.data() + OriginalType.RecordData.size(); + for (; Align < 4; ++Align) + *DestContent++ = LF_PAD4 - Align; + } return Storage; } diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 4f10f8524a9b..51a1f0a544e3 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -44,6 +44,9 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) { void TpiStreamBuilder::addTypeRecord(ArrayRef<uint8_t> Record, Optional<uint32_t> Hash) { // If we just crossed an 8KB threshold, add a type index offset. + assert(((Record.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); size_t NewSize = TypeRecordBytes + Record.size(); constexpr size_t EightKB = 8 * 1024; if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) { @@ -153,8 +156,11 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, return EC; for (auto Rec : TypeRecords) { - assert(!Rec.empty()); // An empty record will not write anything, but it - // would shift all offsets from here on. + assert(!Rec.empty() && "Attempting to write an empty type record shifts " + "all offsets in the TPI stream!"); + assert(((Rec.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); if (auto EC = Writer.writeBytes(Rec)) return EC; } diff --git a/contrib/llvm-project/llvm/lib/LTO/LTO.cpp b/contrib/llvm-project/llvm/lib/LTO/LTO.cpp index 297b11de17a9..fa2f0777897b 100644 --- a/contrib/llvm-project/llvm/lib/LTO/LTO.cpp +++ b/contrib/llvm-project/llvm/lib/LTO/LTO.cpp @@ -147,8 +147,17 @@ void llvm::computeLTOCacheKey( // Include the hash for the current module auto ModHash = Index.getModuleHash(ModuleID); Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); + + std::vector<uint64_t> ExportsGUID; + ExportsGUID.reserve(ExportList.size()); for (const auto &VI : ExportList) { auto GUID = VI.getGUID(); + ExportsGUID.push_back(GUID); + } + + // Sort the export list elements GUIDs. + llvm::sort(ExportsGUID); + for (uint64_t GUID : ExportsGUID) { // The export list can impact the internalization, be conservative here Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID))); } @@ -156,12 +165,23 @@ void llvm::computeLTOCacheKey( // Include the hash for every module we import functions from. The set of // imported symbols for each module may affect code generation and is // sensitive to link order, so include that as well. - for (auto &Entry : ImportList) { - auto ModHash = Index.getModuleHash(Entry.first()); + using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator; + std::vector<ImportMapIteratorTy> ImportModulesVector; + ImportModulesVector.reserve(ImportList.size()); + + for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end(); + ++It) { + ImportModulesVector.push_back(It); + } + llvm::sort(ImportModulesVector, + [](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs) + -> bool { return Lhs->getKey() < Rhs->getKey(); }); + for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) { + auto ModHash = Index.getModuleHash(EntryIt->first()); Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); - AddUint64(Entry.second.size()); - for (auto &Fn : Entry.second) + AddUint64(EntryIt->second.size()); + for (auto &Fn : EntryIt->second) AddUint64(Fn); } diff --git a/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp index d567cc14a830..8cbe09f040be 100644 --- a/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MCObjectFileInfo.cpp @@ -761,7 +761,6 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) { Ctx->getWasmSection(".debug_ranges", SectionKind::getMetadata()); DwarfMacinfoSection = Ctx->getWasmSection(".debug_macinfo", SectionKind::getMetadata()); - DwarfAddrSection = Ctx->getWasmSection(".debug_addr", SectionKind::getMetadata()); DwarfCUIndexSection = Ctx->getWasmSection(".debug_cu_index", SectionKind::getMetadata()); DwarfTUIndexSection = Ctx->getWasmSection(".debug_tu_index", SectionKind::getMetadata()); DwarfInfoSection = @@ -770,6 +769,17 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) { DwarfPubNamesSection = Ctx->getWasmSection(".debug_pubnames", SectionKind::getMetadata()); DwarfPubTypesSection = Ctx->getWasmSection(".debug_pubtypes", SectionKind::getMetadata()); + DwarfDebugNamesSection = + Ctx->getWasmSection(".debug_names", SectionKind::getMetadata()); + DwarfStrOffSection = + Ctx->getWasmSection(".debug_str_offsets", SectionKind::getMetadata()); + DwarfAddrSection = + Ctx->getWasmSection(".debug_addr", SectionKind::getMetadata()); + DwarfRnglistsSection = + Ctx->getWasmSection(".debug_rnglists", SectionKind::getMetadata()); + DwarfLoclistsSection = + Ctx->getWasmSection(".debug_loclists", SectionKind::getMetadata()); + // Wasm use data section for LSDA. // TODO Consider putting each function's exception table in a separate // section, as in -function-sections, to facilitate lld's --gc-section. diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td index 0106355b1a44..6e57543c4c0f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td @@ -443,6 +443,10 @@ def SVEUnsupported : AArch64Unsupported { HasSVE2BitPerm]; } +def PAUnsupported : AArch64Unsupported { + let F = [HasPA]; +} + include "AArch64SchedA53.td" include "AArch64SchedA57.td" include "AArch64SchedCyclone.td" @@ -453,6 +457,7 @@ include "AArch64SchedExynosM4.td" include "AArch64SchedExynosM5.td" include "AArch64SchedThunderX.td" include "AArch64SchedThunderX2T99.td" +include "AArch64SchedThunderX3T110.td" def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", "Cortex-A35 ARM processors", [ @@ -780,6 +785,25 @@ def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", FeatureLSE, HasV8_1aOps]>; +def ProcThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily", + "ThunderX3T110", + "Marvell ThunderX3 processors", [ + FeatureAggressiveFMA, + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureArithmeticBccFusion, + FeatureNEON, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureLSE, + FeaturePA, + FeatureUseAA, + FeatureBalanceFPOps, + FeaturePerfMon, + FeatureStrictAlign, + HasV8_3aOps]>; + def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", "Cavium ThunderX processors", [ FeatureCRC, @@ -878,6 +902,8 @@ def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>; def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>; // Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>; +// Marvell ThunderX3T110 Processors. +def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>; // FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57. def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp index 6fa3a462bc71..1956014b738d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp @@ -118,9 +118,15 @@ void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall, auto MBBI = MBB.begin(); - // PACI[AB]SP are implicitly BTI JC, so no BTI instruction needed there. - if (MBBI != MBB.end() && (MBBI->getOpcode() == AArch64::PACIASP || - MBBI->getOpcode() == AArch64::PACIBSP)) + // Skip the meta instuctions, those will be removed anyway. + for (; MBBI != MBB.end() && MBBI->isMetaInstruction(); ++MBBI) + ; + + // SCTLR_EL1.BT[01] is set to 0 by default which means + // PACI[AB]SP are implicitly BTI C so no BTI C instruction is needed there. + if (MBBI != MBB.end() && HintNum == 34 && + (MBBI->getOpcode() == AArch64::PACIASP || + MBBI->getOpcode() == AArch64::PACIBSP)) return; BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index ea3e800a1ad2..651ad9ad4c83 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -211,6 +211,24 @@ AArch64FrameLowering::getStackIDForScalableVectors() const { return TargetStackID::SVEVector; } +/// Returns the size of the fixed object area (allocated next to sp on entry) +/// On Win64 this may include a var args area and an UnwindHelp object for EH. +static unsigned getFixedObjectSize(const MachineFunction &MF, + const AArch64FunctionInfo *AFI, bool IsWin64, + bool IsFunclet) { + if (!IsWin64 || IsFunclet) { + // Only Win64 uses fixed objects, and then only for the function (not + // funclets) + return 0; + } else { + // Var args are stored here in the primary function. + const unsigned VarArgsArea = AFI->getVarArgsGPRSize(); + // To support EH funclets we allocate an UnwindHelp object + const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0); + return alignTo(VarArgsArea + UnwindHelpObject, 16); + } +} + /// Returns the size of the entire SVE stackframe (calleesaves + spills). static StackOffset getSVEStackSize(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); @@ -959,10 +977,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - // Var args are accounted for in the containing function, so don't - // include them for funclets. - unsigned FixedObject = (IsWin64 && !IsFunclet) ? - alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // All of the remaining stack allocations are for locals. @@ -993,32 +1008,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, ++MBBI; } - // The code below is not applicable to funclets. We have emitted all the SEH - // opcodes that we needed to emit. The FP and BP belong to the containing - // function. - if (IsFunclet) { - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) - .setMIFlag(MachineInstr::FrameSetup); - } - - // SEH funclets are passed the frame pointer in X1. If the parent - // function uses the base register, then the base register is used - // directly, and is not retrieved from X1. - if (F.hasPersonalityFn()) { - EHPersonality Per = classifyEHPersonality(F.getPersonalityFn()); - if (isAsynchronousEHPersonality(Per)) { - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP) - .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup); - MBB.addLiveIn(AArch64::X1); - } - } - - return; - } - - if (HasFP) { + // For funclets the FP belongs to the containing function. + if (!IsFunclet && HasFP) { // Only set up FP if we actually need to. int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0; @@ -1161,7 +1152,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Allocate space for the rest of the frame. if (NumBytes) { - const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); + // Alignment is required for the parent frame, not the funclet + const bool NeedsRealignment = + !IsFunclet && RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NeedsRealignment) { @@ -1215,7 +1208,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. - if (RegInfo->hasBasePointer(MF)) { + // For funclets the BP belongs to the containing function. + if (!IsFunclet && RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); if (NeedsWinCFI) { @@ -1232,6 +1226,19 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + // SEH funclets are passed the frame pointer in X1. If the parent + // function uses the base register, then the base register is used + // directly, and is not retrieved from X1. + if (IsFunclet && F.hasPersonalityFn()) { + EHPersonality Per = classifyEHPersonality(F.getPersonalityFn()); + if (isAsynchronousEHPersonality(Per)) { + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP) + .addReg(AArch64::X1) + .setMIFlag(MachineInstr::FrameSetup); + MBB.addLiveIn(AArch64::X1); + } + } + if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); const int StackGrowth = isTargetDarwin(MF) @@ -1450,10 +1457,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - // Var args are accounted for in the containing function, so don't - // include them for funclets. - unsigned FixedObject = - (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); uint64_t AfterCSRPopSize = ArgumentPopSize; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; @@ -1679,7 +1683,9 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + + unsigned FixedObject = + getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false); unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo()); return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; @@ -2632,9 +2638,14 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( ++MBBI; // Create an UnwindHelp object. - int UnwindHelpFI = - MFI.CreateStackObject(/*size*/8, /*alignment*/16, false); + // The UnwindHelp object is allocated at the start of the fixed object area + int64_t FixedObject = + getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false); + int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8, + /*SPOffset*/ -FixedObject, + /*IsImmutable=*/false); EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; + // We need to store -2 into the UnwindHelp object at the start of the // function. DebugLoc DL; @@ -2656,10 +2667,14 @@ int AArch64FrameLowering::getFrameIndexReferencePreferSP( const MachineFunction &MF, int FI, unsigned &FrameReg, bool IgnoreSPUpdates) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); - LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " - << MFI.getObjectOffset(FI) << "\n"); - FrameReg = AArch64::SP; - return MFI.getObjectOffset(FI); + if (IgnoreSPUpdates) { + LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " + << MFI.getObjectOffset(FI) << "\n"); + FrameReg = AArch64::SP; + return MFI.getObjectOffset(FI); + } + + return getFrameIndexReference(MF, FI, FrameReg); } /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td index a6df0f3f083c..c5ff1fcb274b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td @@ -26,7 +26,8 @@ def CortexA53Model : SchedMachineModel { // v 1.0 Spreadsheet let CompleteModel = 1; - list<Predicate> UnsupportedFeatures = SVEUnsupported.F; + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td index 9f566d1c7079..a760c4319005 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -31,7 +31,8 @@ def CortexA57Model : SchedMachineModel { let LoopMicroOpBufferSize = 16; let CompleteModel = 1; - list<Predicate> UnsupportedFeatures = SVEUnsupported.F; + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td index 798ecb7508c0..5ddfe9e0e34c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td @@ -18,7 +18,8 @@ def CycloneModel : SchedMachineModel { let MispredictPenalty = 16; // 14-19 cycles are typical. let CompleteModel = 1; - list<Predicate> UnsupportedFeatures = SVEUnsupported.F; + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td index d1734c455b2b..3272640cb3f2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td @@ -24,7 +24,8 @@ def ExynosM3Model : SchedMachineModel { let MispredictPenalty = 16; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. - list<Predicate> UnsupportedFeatures = SVEUnsupported.F; + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td index d2284f9fa0b5..cac86491f56d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -24,7 +24,8 @@ def ExynosM4Model : SchedMachineModel { let MispredictPenalty = 16; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. - list<Predicate> UnsupportedFeatures = SVEUnsupported.F; + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td index df7402591e7b..86477b81b8bf 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td @@ -24,7 +24,8 @@ def ExynosM5Model : SchedMachineModel { let MispredictPenalty = 15; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. - list<Predicate> UnsupportedFeatures = SVEUnsupported.F; + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td index 92d03963de57..a17ab36d7f9e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td @@ -23,8 +23,8 @@ def FalkorModel : SchedMachineModel { let MispredictPenalty = 11; // Minimum branch misprediction penalty. let CompleteModel = 1; - list<Predicate> UnsupportedFeatures = SVEUnsupported.F; - + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td index 0e1a24103121..ba14bf1f50de 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td @@ -27,8 +27,8 @@ def KryoModel : SchedMachineModel { let LoopMicroOpBufferSize = 16; let CompleteModel = 1; - list<Predicate> UnsupportedFeatures = SVEUnsupported.F; - + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td index 3b6aecf5c035..9c50f9708583 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td @@ -25,8 +25,8 @@ def ThunderXT8XModel : SchedMachineModel { let PostRAScheduler = 1; // Use PostRA scheduler. let CompleteModel = 1; - list<Predicate> UnsupportedFeatures = SVEUnsupported.F; - + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td index e2a293c06877..233613f7be3a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -25,8 +25,8 @@ def ThunderX2T99Model : SchedMachineModel { let PostRAScheduler = 1; // Using PostRA sched. let CompleteModel = 1; - list<Predicate> UnsupportedFeatures = SVEUnsupported.F; - + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td new file mode 100644 index 000000000000..00838cc4b9bd --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td @@ -0,0 +1,1997 @@ +//=- AArch64SchedThunderX3T110.td - Marvell ThunderX3 T110 ---*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the scheduling model for Marvell ThunderX3T110 +// family of processors. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Pipeline Description. + +def ThunderX3T110Model : SchedMachineModel { + let IssueWidth = 4; // 4 micro-ops dispatched at a time. + let MicroOpBufferSize = 70; // 70 entries in micro-op re-order buffer. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 12; // Extra cycles for mispredicted branch. + // Determined via a mix of micro-arch details and experimentation. + let LoopMicroOpBufferSize = 128; // FIXME: might be much bigger in TX3. + let PostRAScheduler = 1; // Using PostRA sched. + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +let SchedModel = ThunderX3T110Model in { + +// Issue ports. + +// Port 0: ALU. +def THX3T110P0 : ProcResource<1>; + +// Port 1: ALU. +def THX3T110P1 : ProcResource<1>; + +// Port 2: ALU/Branch. +def THX3T110P2 : ProcResource<1>; + +// Port 3: ALU/Branch. +def THX3T110P3 : ProcResource<1>; + +// Port 4: Load/Store. +def THX3T110P4 : ProcResource<1>; + +// Port 5: Load/store. +def THX3T110P5 : ProcResource<1>; + +// Port 6: FP/Neon/SIMD/Crypto. +def THX3T110P6FP0 : ProcResource<1>; + +// Port 7: FP/Neon/SIMD/Crypto. +def THX3T110P7FP1 : ProcResource<1>; + +// Port 8: FP/Neon/SIMD/Crypto. +def THX3T110P8FP2 : ProcResource<1>; + +// Port 9: FP/Neon/SIMD/Crypto. +def THX3T110P9FP3 : ProcResource<1>; + +// Port 10: Store Data Unit. +def THX3T110SD0 : ProcResource<1>; + +// Define groups for the functional units on each issue port. Each group +// created will be used by a WriteRes. + +// Integer divide/mulhi micro-ops only on port I1. +def THX3T110I1 : ProcResGroup<[THX3T110P1]>; + +// Branch micro-ops on ports I2/I3. +def THX3T110I23 : ProcResGroup<[THX3T110P2, THX3T110P3]>; + +// Branch micro-ops on ports I1/I2/I3. +def THX3T110I123 : ProcResGroup<[THX3T110P1, THX3T110P2, THX3T110P3]>; + +// Integer micro-ops on ports I0/I1/I2. +def THX3T110I012 : ProcResGroup<[THX3T110P0, THX3T110P1, THX3T110P2]>; + +// Integer micro-ops on ports I0/I1/I2/I3. +def THX3T110I0123 : ProcResGroup<[THX3T110P0, THX3T110P1, + THX3T110P2, THX3T110P3]>; + +// FP micro-ops on ports FP0/FP1/FP2/FP3. +def THX3T110FP0123 : ProcResGroup<[THX3T110P6FP0, THX3T110P7FP1, + THX3T110P8FP2, THX3T110P9FP3]>; + +// FP micro-ops on ports FP2/FP3. +def THX3T110FP23 : ProcResGroup<[THX3T110P8FP2, THX3T110P9FP3]>; + +// ASIMD micro-ops on ports FP0/FP1/FP2/FP3. +def THX3T110SIMD : ProcResGroup<[THX3T110P6FP0, THX3T110P7FP1, + THX3T110P8FP2, THX3T110P9FP3]>; + +// Store data micro-ops only on port 10. +def THX3T110SD : ProcResGroup<[THX3T110SD0]>; + +// Load/store micro-ops on ports P4/P5. +def THX3T110LS : ProcResGroup<[THX3T110P4, THX3T110P5]>; + +// 70 entry unified scheduler. +def THX3T110ANY: ProcResGroup<[THX3T110P0, THX3T110P1, THX3T110P2, + THX3T110P3, THX3T110P4, THX3T110P5, + THX3T110P6FP0, THX3T110P7FP1, + THX3T110P8FP2, THX3T110P9FP3]> { + let BufferSize = 70; +} + +// Define commonly used write types for InstRW specializations. +// All definitions follow the format: THX3T110Write_<NumCycles>Cyc_<Resources>. + +// 3 cycles on I1. +def THX3T110Write_3Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 4 cycles on I1. +def THX3T110Write_4Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 5 cycles on I1. +def THX3T110Write_5Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 7 cycles on I1. +def THX3T110Write_7Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 23 cycles on I1. +def THX3T110Write_23Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 23; + let ResourceCycles = [13, 23]; + let NumMicroOps = 4; +} + +// 39 cycles on I1. +def THX3T110Write_39Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 39; + let ResourceCycles = [13, 39]; + let NumMicroOps = 4; +} + +// 1 cycle on I2/I3 +def THX3T110Write_1Cyc_I23 : SchedWriteRes<[THX3T110I23]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 8 cycles on I2/I3 +def THX3T110Write_8Cyc_I23 : SchedWriteRes<[THX3T110I23]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 1 cycle on I1/I2/I3 +def THX3T110Write_1Cyc_I123 : SchedWriteRes<[THX3T110I123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 8 cycles on I1/I2/I3 +def THX3T110Write_8Cyc_I123 : SchedWriteRes<[THX3T110I123]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 1 cycle on I0/I1/I2/I3. +def THX3T110Write_1Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on I0/I1/I2/I3. +def THX3T110Write_2Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 3 cycles on I0/I1/I2/I3. +def THX3T110Write_3Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 4 cycles on I0/I1/I2/I3. +def THX3T110Write_4Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 5 cycles on I0/I1/I2/I3. +def THX3T110Write_5Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on I0/I1/I2/I3. +def THX3T110Write_6Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 8 cycles on I0/I1/I2/I3. +def THX3T110Write_8Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 8; + let NumMicroOps = 4; +} + +// 13 cycles on I0/I1/I2/I3. +def THX3T110Write_13Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 13; + let NumMicroOps = 3; +} + +// 23 cycles on I0/I1/I2/I3. +def THX3T110Write_23Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 23; + let NumMicroOps = 3; +} + +// 39 cycles on I0/I1/I2/I3. +def THX3T110Write_39Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 39; + let NumMicroOps = 3; +} + +// 4 cycles on F2/F3. +def THX3T110Write_4Cyc_F23 : SchedWriteRes<[THX3T110FP23]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 5 cycles on F0/F1/F2/F3. +def THX3T110Write_5Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 6 cycles on F0/F1/F2/F3. +def THX3T110Write_6Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 7 cycles on F0/F1/F2/F3. +def THX3T110Write_7Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 8 cycles on F0/F1/F2/F3. +def THX3T110Write_8Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 10 cycles on F0/F1/F2/F3. +def THX3T110Write_10Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 10; + let NumMicroOps = 3; +} + +// 16 cycles on F0/F1/F2/F3. +def THX3T110Write_16Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 16; + let NumMicroOps = 3; + let ResourceCycles = [8]; +} + +// 23 cycles on F0/F1/F2/F3. +def THX3T110Write_23Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 23; + let NumMicroOps = 3; + let ResourceCycles = [11]; +} + +// 1 cycle on LS0/LS1. +def THX3T110Write_1Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 1; + let NumMicroOps = 1; +} + +// 2 cycles on LS0/LS1. +def THX3T110Write_2Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on LS0/LS1. +def THX3T110Write_4Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} + +// 5 cycles on LS0/LS1. +def THX3T110Write_5Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0/LS1. +def THX3T110Write_6Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 4 + 5 cycles on LS0/LS1. +// First resource is available after 4 cycles. +// Second resource is available after 5 cycles. +// Load vector pair, immed offset, Q-form [LDP/LDNP]. +def THX3T110Write_4_5Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [4, 5]; +} + +// 4 + 8 cycles on LS0/LS1. +// First resource is available after 4 cycles. +// Second resource is available after 8 cycles. +// Load vector pair, immed offset, S/D-form [LDP/LDNP]. +def THX3T110Write_4_8Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [4, 8]; +} + +// 11 cycles on LS0/LS1 and I1. +def THX3T110Write_11Cyc_LS01_I1 : + SchedWriteRes<[THX3T110LS, THX3T110I1]> { + let Latency = 11; + let NumMicroOps = 4; +} + +// 1 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_1Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 1 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. +def THX3T110Write_1Cyc_LS01_I0123_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 3; +} + +// 4 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_4Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 4 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. +def THX3T110Write_4Cyc_LS01_I0123_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 5 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_5Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 5 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. +def THX3T110Write_5Cyc_LS01_I0123_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_6Cyc_LS01_I012 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 6; + let NumMicroOps = 4; +} + +// 6 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. +def THX3T110Write_6Cyc_LS01_I0123_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 1 cycle on LS0/LS1 and SD. +def THX3T110Write_1Cyc_LS01_SD : + SchedWriteRes<[THX3T110LS, THX3T110SD]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on LS0/LS1 and SD. +def THX3T110Write_2Cyc_LS01_SD : + SchedWriteRes<[THX3T110LS, THX3T110SD]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on LS0/LS1 and SD. +def THX3T110Write_4Cyc_LS01_SD : + SchedWriteRes<[THX3T110LS, THX3T110SD]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 5 cycles on LS0/LS1 and SD. +def THX3T110Write_5Cyc_LS01_SD : + SchedWriteRes<[THX3T110LS, THX3T110SD]> { + let Latency = 5; + let NumMicroOps = 4; +} + +// 6 cycles on LS0/LS1 and SD. +def THX3T110Write_6Cyc_LS01_SD : + SchedWriteRes<[THX3T110LS, THX3T110SD]> { + let Latency = 6; + let NumMicroOps = 5; +} + +// 1 cycle on LS0/LS1, SD and I0/I1/I2/I3. +def THX3T110Write_1Cyc_LS01_SD_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on LS0/LS1, SD and I0/I1/I2/I3. +def THX3T110Write_2Cyc_LS01_SD_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on LS0/LS1, SD and I0/I1/I2/I3. +def THX3T110Write_4Cyc_LS01_SD_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 5 cycles on LS0/LS1, SD and I0/I1/I2/I3. +def THX3T110Write_5Cyc_LS01_SD_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 5; + let NumMicroOps = 4; +} + +// 6 cycles on LS0/LS1, SD and I0/I1/I2/I3. +def THX3T110Write_6Cyc_LS01_SD_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 6; + let NumMicroOps = 5; +} + +// 1 cycles on LS0/LS1 and F0/F1/F2/F3. +def THX3T110Write_1Cyc_LS01_F0123 : + SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 5 cycles on LS0/LS1 and F0/F1/F2/F3. +def THX3T110Write_5Cyc_LS01_F0123 : + SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0/LS1 and F0/F1/F2/F3. +def THX3T110Write_6Cyc_LS01_F0123 : + SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 7 cycles on LS0/LS1 and F0/F1/F2/F3. +def THX3T110Write_7Cyc_LS01_F0123 : + SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 8 cycles on LS0/LS1 and F0/F1/F2/F3. +def THX3T110Write_8Cyc_LS01_F0123 : + SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 8 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_8Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 12 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_12Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 12; + let NumMicroOps = 4; +} + +// 16 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_16Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 16; + let NumMicroOps = 5; +} + +// 24 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_24Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 24; + let NumMicroOps = 10; +} + +// 32 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_32Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 32; + let NumMicroOps = 14; +} + +// 3 cycles on F0/F1/F2/F3. +def THX3T110Write_3Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 4 cycles on F0/F1/F2/F3. +def THX3T110Write_4Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 5 cycles on F0/F1/F2/F3. +def THX3T110Write_5Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 10 cycles on F0/F1/F2/F3. +def THX3T110Write_10Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 10; + let NumMicroOps = 4; +} + +// 15 cycles on F0/F1/F2/F3. +def THX3T110Write_15Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 15; + let NumMicroOps = 7; +} + +// 16 cycles on F0/F1/F2/F3. +def THX3T110Write_16Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 16; + let NumMicroOps = 3; +} + +// 18 cycles on F0/F1/F2/F3. +def THX3T110Write_18Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 18; + let NumMicroOps = 3; +} + +// 19 cycles on F0/F1/F2/F3. +def THX3T110Write_19Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 19; + let NumMicroOps = 4; +} + +// 20 cycles on F0/F1/F2/F3. +def THX3T110Write_20Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 20; + let NumMicroOps = 4; +} + +// 23 cycles on F0/F1/F2/F3. +def THX3T110Write_23Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 23; + let NumMicroOps = 4; +} + +// 3 cycles on F2/F3 and 4 cycles on F0/F1/F2/F3. +def THX3T110Write_3_4Cyc_F23_F0123 : + SchedWriteRes<[THX3T110FP23, THX3T110FP0123]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [3, 4]; +} + + +// Define commonly used read types. + +// No forwarding is provided for these types. +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 0>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + +//===----------------------------------------------------------------------===// +// 3. Instruction Tables. + +//--- +// 3.1 Branch Instructions +//--- + +// Branch, immed +// Branch and link, immed +// Compare and branch +def : WriteRes<WriteBr, [THX3T110I23]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Branch, register +// Branch and link, register != LR +// Branch and link, register = LR +def : WriteRes<WriteBrReg, [THX3T110I23]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def : WriteRes<WriteSys, []> { let Latency = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } + +def : WriteRes<WriteAtomic, []> { + let Latency = 4; + let NumMicroOps = 2; +} + +//--- +// Branch +//--- +def : InstRW<[THX3T110Write_1Cyc_I23], (instrs B, BL, BR, BLR)>; +def : InstRW<[THX3T110Write_1Cyc_I23], (instrs Bcc)>; +def : InstRW<[THX3T110Write_1Cyc_I23], (instrs RET)>; +def : InstRW<[THX3T110Write_1Cyc_I23], + (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; + +//--- +// 3.2 Arithmetic and Logical Instructions +// 3.3 Move and Shift Instructions +//--- + + +// ALU, basic +// Conditional compare +// Conditional select +// Address generation +def : WriteRes<WriteI, [THX3T110I0123]> { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteI], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +def : InstRW<[WriteI], (instrs COPY)>; + +// ALU, extend and/or shift +def : WriteRes<WriteISReg, [THX3T110I0123]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteISReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +def : WriteRes<WriteIEReg, [THX3T110I0123]> { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteIEReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +// Move immed +def : WriteRes<WriteImm, [THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def : InstRW<[THX3T110Write_1Cyc_I0123], + (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; + +def : InstRW<[THX3T110Write_1Cyc_I0123], + (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; + +// Variable shift +def : WriteRes<WriteIS, [THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +//--- +// 3.4 Divide and Multiply Instructions +//--- + +// Divide, W-form +// Latency range of 13-23/13-39. +def : WriteRes<WriteID32, [THX3T110I1]> { + let Latency = 39; + let ResourceCycles = [39]; + let NumMicroOps = 4; +} + +// Divide, X-form +def : WriteRes<WriteID64, [THX3T110I1]> { + let Latency = 23; + let ResourceCycles = [23]; + let NumMicroOps = 4; +} + +// Multiply accumulate, W-form +def : WriteRes<WriteIM32, [THX3T110I0123]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// Multiply accumulate, X-form +def : WriteRes<WriteIM64, [THX3T110I0123]> { + let Latency = 5; + let NumMicroOps = 3; +} + +//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX3T110Write_5Cyc_I012], +// (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; +def : InstRW<[THX3T110Write_5Cyc_I0123], + (instregex "(S|U)(MADDL|MSUBL)rrr")>; + +def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; +def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; + +// Bitfield extract, two reg +def : WriteRes<WriteExtr, [THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Multiply high +def : InstRW<[THX3T110Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>; + +// Miscellaneous Data-Processing Instructions +// Bitfield extract +def : InstRW<[THX3T110Write_1Cyc_I0123], (instrs EXTRWrri, EXTRXrri)>; + +// Bitifield move - basic +def : InstRW<[THX3T110Write_1Cyc_I0123], + (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; + +// Bitfield move, insert +def : InstRW<[THX3T110Write_1Cyc_I0123], (instregex "^BFM")>; +def : InstRW<[THX3T110Write_1Cyc_I0123], (instregex "(S|U)?BFM.*")>; + +// Count leading +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], + (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$")>; + +// Reverse bits +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instrs RBITWr, RBITXr)>; + +// Cryptography Extensions +def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^AES[DE]")>; +def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^AESI?MC")>; +def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^PMULL")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1SU0")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1(H|SU1)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1[CMP]")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA256SU0")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA256(H|H2|SU1)")>; + +// CRC Instructions +// def : InstRW<[THX3T110Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>; +def : InstRW<[THX3T110Write_4Cyc_I1], + (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>; + +def : InstRW<[THX3T110Write_4Cyc_I1], + (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>; + +// Reverse bits/bytes +// NOTE: Handled by WriteI. + +//--- +// 3.6 Load Instructions +// 3.10 FP Load Instructions +//--- + +// Load register, literal +// Load register, unscaled immed +// Load register, immed unprivileged +// Load register, unsigned immed +def : WriteRes<WriteLD, [THX3T110LS]> { + let Latency = 4; + let NumMicroOps = 4; +} + +// Load register, immed post-index +// NOTE: Handled by WriteLD, WriteI. +// Load register, immed pre-index +// NOTE: Handled by WriteLD, WriteAdr. +def : WriteRes<WriteAdr, [THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Load pair, immed offset, normal +// Load pair, immed offset, signed words, base != SP +// Load pair, immed offset signed words, base = SP +// LDP only breaks into *one* LS micro-op. Thus +// the resources are handled by WriteLD. +def : WriteRes<WriteLDHi, []> { + let Latency = 4; + let NumMicroOps = 4; +} + +// Load register offset, basic +// Load register, register offset, scale by 4/8 +// Load register, register offset, scale by 2 +// Load register offset, extend +// Load register, register offset, extend, scale by 4/8 +// Load register, register offset, extend, scale by 2 +def THX3T110WriteLDIdx : SchedWriteVariant<[ + SchedVar<ScaledIdxPred, [THX3T110Write_4Cyc_LS01_I0123_I0123]>, + SchedVar<NoSchedPred, [THX3T110Write_4Cyc_LS01_I0123]>]>; +def : SchedAlias<WriteLDIdx, THX3T110WriteLDIdx>; + +def THX3T110ReadAdrBase : SchedReadVariant<[ + SchedVar<ScaledIdxPred, [ReadDefault]>, + SchedVar<NoSchedPred, [ReadDefault]>]>; +def : SchedAlias<ReadAdrBase, THX3T110ReadAdrBase>; + +// Load pair, immed pre-index, normal +// Load pair, immed pre-index, signed words +// Load pair, immed post-index, normal +// Load pair, immed post-index, signed words +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPDi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPQi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPSi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPXi)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPDi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPQi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPSi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPSWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPXi)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRBui)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRDui)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRHui)>; +def : InstRW<[THX3T110Write_5Cyc_LS01], (instrs LDRQui)>; +def : InstRW<[THX3T110Write_5Cyc_LS01], (instrs LDRSui)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRDl)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRQl)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRWl)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRXl)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRBi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRHi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRXi)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSBWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSBXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSHWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSHXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSWi)>; + +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr], + (instrs LDRBpre, LDRDpre, LDRHpre, LDRQpre, + LDRSpre, LDRWpre, LDRXpre, + LDRSBWpre, LDRSBXpre, LDRSBWpost, LDRSBXpost, + LDRSHWpre, LDRSHXpre, LDRSHWpost, LDRSHXpost, + LDRBBpre, LDRBBpost, LDRHHpre, LDRHHpost)>; + +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPDpost, LDPQpost, LDPSpost, LDPWpost, LDPXpost)>; + +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteI], + (instrs LDRBpost, LDRDpost, LDRHpost, + LDRQpost, LDRSpost, LDRWpost, LDRXpost)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteLDHi, WriteAdr], + (instrs LDPDpre, LDPQpre, LDPSpre, LDPWpre, LDPXpre)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteAdr], + (instrs LDRBpre, LDRDpre, LDRHpre, LDRQpre, + LDRSpre, LDRWpre, LDRXpre)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteLDHi, WriteAdr], + (instrs LDPDpost, LDPQpost, LDPSpost, LDPWpost, LDPXpost)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteI], + (instrs LDRBpost, LDRDpost, LDRHpost, LDRQpost, + LDRSpost, LDRWpost, LDRXpost)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRBroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRDroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHHroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRQroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHWroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHXroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRWroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRXroW)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRBroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRDroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHHroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRQroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHWroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHXroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRWroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRXroX)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURBi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURBBi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURDi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURHi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURHHi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURQi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSBWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSBXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSHWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSHXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSWi)>; + +// Load exclusive +def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAR(B|H|W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAXR(B|H|W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDXR(B|H|W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAXP(W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDXP(W|X)$")>; + +//--- +// Prefetch +//--- +def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMl)>; +def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFUMi)>; +def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMui)>; +def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMroW)>; +def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMroX)>; + +//-- +// 3.7 Store Instructions +// 3.11 FP Store Instructions +//-- + +// Store register, unscaled immed +// Store register, immed unprivileged +// Store register, unsigned immed +def : WriteRes<WriteST, [THX3T110LS, THX3T110SD]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store register, immed post-index +// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase + +// Store register, immed pre-index +// NOTE: Handled by WriteAdr, WriteST + +// Store register, register offset, basic +// Store register, register offset, scaled by 4/8 +// Store register, register offset, scaled by 2 +// Store register, register offset, extend +// Store register, register offset, extend, scale by 4/8 +// Store register, register offset, extend, scale by 1 +def : WriteRes<WriteSTIdx, [THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store pair, immed offset, W-form +// Store pair, immed offset, X-form +def : WriteRes<WriteSTP, [THX3T110LS, THX3T110SD]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store pair, immed post-index, W-form +// Store pair, immed post-index, X-form +// Store pair, immed pre-index, W-form +// Store pair, immed pre-index, X-form +// NOTE: Handled by WriteAdr, WriteSTP. +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURBi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURBBi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURDi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURHi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURHHi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURQi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURSi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURWi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURXi)>; + +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRBi)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRHi)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRWi)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRXi)>; + +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPDi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPQi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPXi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPWi)>; + +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPDi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPQi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPXi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPWi)>; + +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRBui)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRDui)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRHui)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRQui)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRXui)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRWui)>; + +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRBui)>; +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRDui)>; +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRHui)>; +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRQui)>; +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRXui)>; +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRWui)>; + +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRBui)>; +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRDui)>; +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRHui)>; +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRQui)>; +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRXui)>; +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRWui)>; + +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRXroW, STRXroX)>; + +// Store exclusive +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instrs STNPWi, STNPXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLR(B|H|W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STXP(W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STXR(B|H|W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLXP(W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLXR(B|H|W|X)$")>; + +//--- +// 3.8 FP Data Processing Instructions +//--- + +// FP absolute value +// FP min/max +// FP negate +def : WriteRes<WriteF, [THX3T110FP0123]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// FP arithmetic +def : InstRW<[THX3T110Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; + +// FP compare +def : WriteRes<WriteFCmp, [THX3T110FP0123]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// FP Mul, Div, Sqrt +def : WriteRes<WriteFDiv, [THX3T110FP0123]> { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THX3T110XWriteFDiv : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX3T110XWriteFDivSP : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX3T110XWriteFDivDP : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +def THX3T110XWriteFSqrtSP : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX3T110XWriteFSqrtDP : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +// FP divide, S-form +// FP square root, S-form +def : InstRW<[THX3T110XWriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[THX3T110XWriteFSqrtSP], (instrs FSQRTSr)>; +def : InstRW<[THX3T110XWriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[THX3T110XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[THX3T110Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSr")>; + +// FP divide, D-form +// FP square root, D-form +def : InstRW<[THX3T110XWriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[THX3T110XWriteFSqrtDP], (instrs FSQRTDr)>; +def : InstRW<[THX3T110XWriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[THX3T110XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +def : InstRW<[THX3T110Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>; + +// FP multiply +// FP multiply accumulate +def : WriteRes<WriteFMul, [THX3T110FP0123]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX3T110XWriteFMul : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX3T110XWriteFMulAcc : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def : InstRW<[THX3T110XWriteFMul], (instregex "^FMUL", "^FNMUL")>; +def : InstRW<[THX3T110XWriteFMulAcc], + (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; + +// FP round to integral +def : InstRW<[THX3T110Write_7Cyc_F01], + (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; + +// FP select +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FCSEL")>; + +//--- +// 3.9 FP Miscellaneous Instructions +//--- + +// FP convert, from vec to vec reg +// FP convert, from gen to vec reg +// FP convert, from vec to gen reg +def : WriteRes<WriteFCvt, [THX3T110FP0123]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// FP move, immed +// FP move, register +def : WriteRes<WriteFImm, [THX3T110FP0123]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// FP transfer, from gen to vec reg +// FP transfer, from vec to gen reg +def : WriteRes<WriteFCopy, [THX3T110FP0123]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def : InstRW<[THX3T110Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; + +//--- +// 3.12 ASIMD Integer Instructions +//--- + +// ASIMD absolute diff, D-form +// ASIMD absolute diff, Q-form +// ASIMD absolute diff accum, D-form +// ASIMD absolute diff accum, Q-form +// ASIMD absolute diff accum long +// ASIMD absolute diff long +// ASIMD arith, basic +// ASIMD arith, complex +// ASIMD compare +// ASIMD logical (AND, BIC, EOR) +// ASIMD max/min, basic +// ASIMD max/min, reduce, 4H/4S +// ASIMD max/min, reduce, 8B/8H +// ASIMD max/min, reduce, 16B +// ASIMD multiply, D-form +// ASIMD multiply, Q-form +// ASIMD multiply accumulate long +// ASIMD multiply accumulate saturating long +// ASIMD multiply long +// ASIMD pairwise add and accumulate +// ASIMD shift accumulate +// ASIMD shift by immed, basic +// ASIMD shift by immed and insert, basic, D-form +// ASIMD shift by immed and insert, basic, Q-form +// ASIMD shift by immed, complex +// ASIMD shift by register, basic, D-form +// ASIMD shift by register, basic, Q-form +// ASIMD shift by register, complex, D-form +// ASIMD shift by register, complex, Q-form +def : WriteRes<WriteV, [THX3T110FP0123]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [4]; +} + +// ASIMD arith, reduce, 4H/4S +// ASIMD arith, reduce, 8B/8H +// ASIMD arith, reduce, 16B + +// ASIMD logical (MVN (alias for NOT), ORN, ORR) +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; + +// ASIMD arith, reduce +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; + +// ASIMD polynomial (8x8) multiply long +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^(S|U|SQD)MULL")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^PMULL(v8i8|v16i8)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^PMULL(v1i64|v2i64)")>; + +// ASIMD absolute diff accum, D-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; +// ASIMD absolute diff accum, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; +// ASIMD absolute diff accum long +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU]ABAL")>; +// ASIMD arith, reduce, 4H/4S +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; +// ASIMD arith, reduce, 8B +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; +// ASIMD arith, reduce, 16B/16H +def : InstRW<[THX3T110Write_10Cyc_F0123], + (instregex "^[SU]?ADDL?Vv16i8v$")>; +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; +// ASIMD max/min, reduce, 16B/16H +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU](MIN|MAX)Vv16i8v$")>; +// ASIMD multiply, D-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^(P?MUL|SQR?DMULH)" # + "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # + "(_indexed)?$")>; +// ASIMD multiply, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD multiply accumulate, D-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; +// ASIMD multiply accumulate, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD shift accumulate +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; + +// ASIMD shift by immed, basic +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv", + "SQSHRNv","SQSHRUNv", "UQRSHRNv", + "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; +// ASIMD shift by immed, complex +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]?(Q|R){1,2}SHR")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SQSHLU")>; +// ASIMD shift by register, basic, Q-form +def : InstRW<[THX3T110Write_5Cyc_F01], + (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; +// ASIMD shift by register, complex, D-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU][QR]{1,2}SHL" # + "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; +// ASIMD shift by register, complex, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD Arithmetic +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(ADD|SUB)HNv.*")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(RADD|RSUB)HNv.*")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", + "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # + "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADALP","^UADALP")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADDLPv","^UADDLPv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADDLV","^UADDLV")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SUQADDv","^USQADDv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv", + "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", + "^SRHADD", "^SUBHNv", "^SUQADD", + "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^CMEQv","^CMGEv","^CMGTv", + "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv", + "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>; + +//--- +// 3.13 ASIMD Floating-point Instructions +//--- + +// ASIMD FP absolute value +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FABSv")>; + +// ASIMD FP arith, normal, D-form +// ASIMD FP arith, normal, Q-form +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], + (instregex "^FABDv", "^FADDv", "^FSUBv")>; + +// ASIMD FP arith,pairwise, D-form +// ASIMD FP arith, pairwise, Q-form +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FADDPv")>; + +// ASIMD FP compare, D-form +// ASIMD FP compare, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FACGEv", "^FACGTv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FCMEQv", "^FCMGEv", + "^FCMGTv", "^FCMLEv", + "^FCMLTv")>; + +// ASIMD FP round, D-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FRINT[AIMNPXZ](v2f32)")>; +// ASIMD FP round, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; + +// ASIMD FP convert, long +// ASIMD FP convert, narrow +// ASIMD FP convert, other, D-form +// ASIMD FP convert, other, Q-form +// NOTE: Handled by WriteV. + +// ASIMD FP convert, long and narrow +def : InstRW<[THX3T110Write_5Cyc_F01], (instregex "^FCVT(L|N|XN)v")>; +// ASIMD FP convert, other, D-form +def : InstRW<[THX3T110Write_5Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD FP convert, other, Q-form +def : InstRW<[THX3T110Write_5Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP divide, D-form, F32 +def : InstRW<[THX3T110Write_16Cyc_F0123], (instrs FDIVv2f32)>; +def : InstRW<[THX3T110Write_16Cyc_F0123], (instregex "FDIVv2f32")>; + +// ASIMD FP divide, Q-form, F32 +def : InstRW<[THX3T110Write_16Cyc_F0123], (instrs FDIVv4f32)>; +def : InstRW<[THX3T110Write_16Cyc_F0123], (instregex "FDIVv4f32")>; + +// ASIMD FP divide, Q-form, F64 +def : InstRW<[THX3T110Write_23Cyc_F0123], (instrs FDIVv2f64)>; +def : InstRW<[THX3T110Write_23Cyc_F0123], (instregex "FDIVv2f64")>; + +// ASIMD FP max/min, normal, D-form +// ASIMD FP max/min, normal, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXv", "^FMAXNMv", + "^FMINv", "^FMINNMv")>; + +// ASIMD FP max/min, pairwise, D-form +// ASIMD FP max/min, pairwise, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXPv", "^FMAXNMPv", + "^FMINPv", "^FMINNMPv")>; + +// ASIMD FP max/min, reduce +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXVv", "^FMAXNMVv", + "^FMINVv", "^FMINNMVv")>; + +// ASIMD FP multiply, D-form, FZ +// ASIMD FP multiply, D-form, no FZ +// ASIMD FP multiply, Q-form, FZ +// ASIMD FP multiply, Q-form, no FZ +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FMULv", "^FMULXv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP multiply accumulate, Dform, FZ +// ASIMD FP multiply accumulate, Dform, no FZ +// ASIMD FP multiply accumulate, Qform, FZ +// ASIMD FP multiply accumulate, Qform, no FZ +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FMLAv", "^FMLSv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP negate +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FNEGv")>; + +//-- +// 3.14 ASIMD Miscellaneous Instructions +//-- + +// ASIMD bit reverse +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^RBITv")>; + +// ASIMD bitwise insert, D-form +// ASIMD bitwise insert, Q-form +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], + (instregex "^BIFv", "^BITv", "^BSLv")>; + +// ASIMD count, D-form +// ASIMD count, Q-form +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], + (instregex "^CLSv", "^CLZv", "^CNTv")>; + +// ASIMD duplicate, gen reg +// ASIMD duplicate, element +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^CPY")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv.+gpr")>; + +// ASIMD extract +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^EXTv")>; + +// ASIMD extract narrow +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^XTNv")>; + +// ASIMD extract narrow, saturating +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; + +// ASIMD insert, element to element +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^INSv")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]MOVv")>; + +// ASIMD move, integer immed +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^MOVIv")>; + +// ASIMD move, FP immed +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FMOVv")>; + +// ASIMD transpose +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^TRN1", "^TRN2")>; + +// ASIMD unzip/zip +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; + +// ASIMD reciprocal estimate, D-form +// ASIMD reciprocal estimate, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", + "^FRSQRTEv", "^URSQRTEv")>; + +// ASIMD reciprocal step, D-form, FZ +// ASIMD reciprocal step, D-form, no FZ +// ASIMD reciprocal step, Q-form, FZ +// ASIMD reciprocal step, Q-form, no FZ +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FRECPSv", "^FRSQRTSv")>; + +// ASIMD reverse +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^REV16v", "^REV32v", "^REV64v")>; + +// ASIMD table lookup, D-form +// ASIMD table lookup, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; +def : InstRW<[THX3T110Write_10Cyc_F0123], + (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; +def : InstRW<[THX3T110Write_15Cyc_F0123], + (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; +def : InstRW<[THX3T110Write_20Cyc_F0123], + (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; + +// ASIMD transfer, element to word or word +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]MOVv")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(S|U)MOVv.*")>; + +// ASIMD transfer gen reg to element +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^INSv")>; + +// ASIMD transpose +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; + +// ASIMD unzip/zip +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^ZIP1v", "^ZIP2v")>; + +//-- +// 3.15 ASIMD Load Instructions +//-- + +// ASIMD load, 1 element, multiple, 1 reg, D-form +// ASIMD load, 1 element, multiple, 1 reg, Q-form +def : InstRW<[THX3T110Write_4Cyc_LS01], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 2 reg, D-form +// ASIMD load, 1 element, multiple, 2 reg, Q-form +def : InstRW<[THX3T110Write_4Cyc_LS01], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 3 reg, D-form +// ASIMD load, 1 element, multiple, 3 reg, Q-form +def : InstRW<[THX3T110Write_5Cyc_LS01], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01, WriteAdr], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 4 reg, D-form +// ASIMD load, 1 element, multiple, 4 reg, Q-form +def : InstRW<[THX3T110Write_6Cyc_LS01], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_6Cyc_LS01, WriteAdr], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, one lane, B/H/S +// ASIMD load, 1 element, one lane, D +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], + (instregex "^LD1i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], + (instregex "^LD1i(8|16|32|64)_POST$")>; + +// ASIMD load, 1 element, all lanes, D-form, B/H/S +// ASIMD load, 1 element, all lanes, D-form, D +// ASIMD load, 1 element, all lanes, Q-form +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, multiple, D-form, B/H/S +// ASIMD load, 2 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], + (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], + (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, one lane, B/H +// ASIMD load, 2 element, one lane, S +// ASIMD load, 2 element, one lane, D +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], + (instregex "^LD2i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], + (instregex "^LD2i(8|16|32|64)_POST$")>; + +// ASIMD load, 2 element, all lanes, D-form, B/H/S +// ASIMD load, 2 element, all lanes, D-form, D +// ASIMD load, 2 element, all lanes, Q-form +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, multiple, D-form, B/H/S +// ASIMD load, 3 element, multiple, Q-form, B/H/S +// ASIMD load, 3 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_8Cyc_LS01_F0123], + (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_8Cyc_LS01_F0123, WriteAdr], + (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, one lone, B/H +// ASIMD load, 3 element, one lane, S +// ASIMD load, 3 element, one lane, D +def : InstRW<[THX3T110Write_7Cyc_LS01_F0123], + (instregex "^LD3i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_7Cyc_LS01_F0123, WriteAdr], + (instregex "^LD3i(8|16|32|64)_POST$")>; + +// ASIMD load, 3 element, all lanes, D-form, B/H/S +// ASIMD load, 3 element, all lanes, D-form, D +// ASIMD load, 3 element, all lanes, Q-form, B/H/S +// ASIMD load, 3 element, all lanes, Q-form, D +def : InstRW<[THX3T110Write_7Cyc_LS01_F0123], + (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_7Cyc_LS01_F0123, WriteAdr], + (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, multiple, D-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_8Cyc_LS01_F0123], + (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_8Cyc_LS01_F0123, WriteAdr], + (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, one lane, B/H +// ASIMD load, 4 element, one lane, S +// ASIMD load, 4 element, one lane, D +def : InstRW<[THX3T110Write_6Cyc_LS01_F0123], + (instregex "^LD4i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_6Cyc_LS01_F0123, WriteAdr], + (instregex "^LD4i(8|16|32|64)_POST$")>; + +// ASIMD load, 4 element, all lanes, D-form, B/H/S +// ASIMD load, 4 element, all lanes, D-form, D +// ASIMD load, 4 element, all lanes, Q-form, B/H/S +// ASIMD load, 4 element, all lanes, Q-form, D +def : InstRW<[THX3T110Write_6Cyc_LS01_F0123], + (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_6Cyc_LS01_F0123, WriteAdr], + (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +//-- +// 3.16 ASIMD Store Instructions +//-- + +// ASIMD store, 1 element, multiple, 1 reg, D-form +// ASIMD store, 1 element, multiple, 1 reg, Q-form +def : InstRW<[THX3T110Write_1Cyc_LS01], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 2 reg, D-form +// ASIMD store, 1 element, multiple, 2 reg, Q-form +def : InstRW<[THX3T110Write_1Cyc_LS01], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 3 reg, D-form +// ASIMD store, 1 element, multiple, 3 reg, Q-form +def : InstRW<[THX3T110Write_1Cyc_LS01], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 4 reg, D-form +// ASIMD store, 1 element, multiple, 4 reg, Q-form +def : InstRW<[THX3T110Write_1Cyc_LS01], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, one lane, B/H/S +// ASIMD store, 1 element, one lane, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST1i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST1i(8|16|32|64)_POST$")>; + +// ASIMD store, 2 element, multiple, D-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 2 element, one lane, B/H/S +// ASIMD store, 2 element, one lane, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST2i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST2i(8|16|32|64)_POST$")>; + +// ASIMD store, 3 element, multiple, D-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 3 element, one lane, B/H +// ASIMD store, 3 element, one lane, S +// ASIMD store, 3 element, one lane, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST3i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST3i(8|16|32|64)_POST$")>; + +// ASIMD store, 4 element, multiple, D-form, B/H/S +// ASIMD store, 4 element, multiple, Q-form, B/H/S +// ASIMD store, 4 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 4 element, one lane, B/H +// ASIMD store, 4 element, one lane, S +// ASIMD store, 4 element, one lane, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST4i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST4i(8|16|32|64)_POST$")>; + +// V8.1a Atomics (LSE) +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs CASB, CASH, CASW, CASX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs CASAB, CASAH, CASAW, CASAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs CASLB, CASLH, CASLW, CASLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs CASALB, CASALH, CASALW, CASALX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, + LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, + LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, + LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, + LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, + LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, + LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, + LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, + LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, + LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, + LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, + LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, + LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs SWPB, SWPH, SWPW, SWPX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; + +// V8.3a PAC +def : InstRW<[THX3T110Write_11Cyc_LS01_I1], (instregex "^LDRAA", "^LDRAB")>; +def : InstRW<[THX3T110Write_8Cyc_I123], + (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, + BRAA, BRAAZ, BRAB, BRABZ)>; +def : InstRW<[THX3T110Write_8Cyc_I123], (instrs RETAA, RETAB)>; + +} // SchedModel = ThunderX3T110Model diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 3636d8d2b628..079e8f1764dc 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -160,6 +160,17 @@ void AArch64Subtarget::initializeProperties() { PrefFunctionLogAlignment = 4; PrefLoopLogAlignment = 2; break; + case ThunderX3T110: + CacheLineSize = 64; + PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 2; + MaxInterleaveFactor = 4; + PrefetchDistance = 128; + MinPrefetchStride = 1024; + MaxPrefetchIterationsAhead = 4; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; + break; } } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h index 79c2c161d3cb..40eb67a153e4 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -63,7 +63,8 @@ public: ThunderXT81, ThunderXT83, ThunderXT88, - TSV110 + TSV110, + ThunderX3T110 }; protected: diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index 6f5f58554d09..d407edfbd966 100644 --- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant " << val << '\n'); - SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64); + SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0)); // After replacement, the current node is dead, we need to // go backward one step to make iterator still work diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index 29abc9303a62..b2ecb531db9d 100644 --- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -116,11 +116,22 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, else continue; - // It must be a form of %1 = *(type *)(%2 + 0) or *(type *)(%2 + 0) = %1. + // It must be a form of %2 = *(type *)(%1 + 0) or *(type *)(%1 + 0) = %2. const MachineOperand &ImmOp = DefInst->getOperand(2); if (!ImmOp.isImm() || ImmOp.getImm() != 0) continue; + // Reject the form: + // %1 = ADD_rr %2, %3 + // *(type *)(%2 + 0) = %1 + if (Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW || + Opcode == BPF::STD || Opcode == BPF::STB32 || Opcode == BPF::STH32 || + Opcode == BPF::STW32) { + const MachineOperand &Opnd = DefInst->getOperand(0); + if (Opnd.isReg() && Opnd.getReg() == I->getReg()) + continue; + } + BuildMI(*DefInst->getParent(), *DefInst, DefInst->getDebugLoc(), TII->get(COREOp)) .add(DefInst->getOperand(0)).addImm(Opcode).add(*BaseOp) .addGlobalAddress(GVal); diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp index a9fb04f20d1c..6daeb3b4b63b 100644 --- a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp @@ -600,6 +600,38 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId, bool CheckPointer, bool SeenPointer) { if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) { TypeId = DIToIdMap[Ty]; + + // To handle the case like the following: + // struct t; + // typedef struct t _t; + // struct s1 { _t *c; }; + // int test1(struct s1 *arg) { ... } + // + // struct t { int a; int b; }; + // struct s2 { _t c; } + // int test2(struct s2 *arg) { ... } + // + // During traversing test1() argument, "_t" is recorded + // in DIToIdMap and a forward declaration fixup is created + // for "struct t" to avoid pointee type traversal. + // + // During traversing test2() argument, even if we see "_t" is + // already defined, we should keep moving to eventually + // bring in types for "struct t". Otherwise, the "struct s2" + // definition won't be correct. + if (Ty && (!CheckPointer || !SeenPointer)) { + if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) { + unsigned Tag = DTy->getTag(); + if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type || + Tag == dwarf::DW_TAG_volatile_type || + Tag == dwarf::DW_TAG_restrict_type) { + uint32_t TmpTypeId; + visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer, + SeenPointer); + } + } + } + return; } diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 886034d9601a..f1fe51f5e54f 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -12,9 +12,6 @@ #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" @@ -27,6 +24,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp index 517ad1c6ee7b..f26e23befde2 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -11,9 +11,6 @@ #include "MCTargetDesc/HexagonBaseInfo.h" #include "RDFCopy.h" #include "RDFDeadCode.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -24,6 +21,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.cpp index a9d39fd4b2dc..34d58f0a7a23 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -11,13 +11,13 @@ //===----------------------------------------------------------------------===// #include "RDFCopy.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.h index 1450ab884849..99b18a75d8c2 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.h +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H #define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/MachineFunction.h" #include <map> #include <vector> diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp index af86c7b1956b..5a98debd3c00 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp @@ -9,13 +9,13 @@ // RDF-based generic dead code elimination. #include "RDFDeadCode.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/Support/Debug.h" #include <queue> diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.h index 7f91977e1d6c..859c8161d355 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.h +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.h @@ -23,8 +23,8 @@ #ifndef RDF_DEADCODE_H #define RDF_DEADCODE_H -#include "RDFGraph.h" -#include "RDFLiveness.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/ADT/SetVector.h" namespace llvm { diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td index 9b3d13989ee2..d7e3519d5539 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], VMSUMSHS, VMSUMUBM, VMSUMUHM, + VMSUMUDM, VMSUMUHS, VMULESB, VMULESH, diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td index 683cfeb9f6dd..b805b9a93443 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td @@ -166,6 +166,9 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true", "Enable Hardware Transactional Memory instructions">; def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true", "Implement mftb using the mfspr instruction">; +def FeatureUnalignedFloats : + SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", + "true", "CPU does not trap on unaligned FP access">; def FeaturePPCPreRASched: SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true", "Use PowerPC pre-RA scheduling strategy">; @@ -252,7 +255,8 @@ def ProcessorFeatures { FeatureExtDiv, FeatureMFTB, DeprecatedDST, - FeatureTwoConstNR]; + FeatureTwoConstNR, + FeatureUnalignedFloats]; list<SubtargetFeature> P7SpecificFeatures = []; list<SubtargetFeature> P7Features = !listconcat(P7InheritableFeatures, P7SpecificFeatures); diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4311df5dbeb8..20c5ac7b378a 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -298,6 +298,11 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, switch (ExtraCode[0]) { default: return true; // Unknown modifier. + case 'L': // A memory reference to the upper word of a double word op. + O << getDataLayout().getPointerSize() << "("; + printOperand(MI, OpNo, O); + O << ")"; + return false; case 'y': // A memory reference for an X-form instruction { const char *RegName = "r0"; @@ -309,7 +314,6 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, } case 'U': // Print 'u' for update form. case 'X': // Print 'x' for indexed form. - { // FIXME: Currently for PowerPC memory operands are always loaded // into a register, so we never get an update or indexed form. // This is bad even for offset forms, since even if we know we @@ -319,7 +323,6 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, assert(MI->getOperand(OpNo).isReg()); return false; } - } } assert(MI->getOperand(OpNo).isReg()); diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 73f4017bde1d..ca1649fae258 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } + if (Subtarget.isISA3_0()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal); + setTruncStoreAction(MVT::f64, MVT::f16, Legal); + setTruncStoreAction(MVT::f32, MVT::f16, Legal); + } else { + // No extending loads from f16 or HW conversions back and forth. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + } + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. @@ -677,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } + setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand); if (!Subtarget.hasP8Vector()) { setOperationAction(ISD::SMAX, MVT::v2i64, Expand); setOperationAction(ISD::SMIN, MVT::v2i64, Expand); @@ -7720,15 +7738,17 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, // Emit a store to the stack slot. SDValue Chain; + unsigned Alignment = DAG.getEVTAlignment(Tmp.getValueType()); if (i32Stack) { MachineFunction &MF = DAG.getMachineFunction(); + Alignment = 4; MachineMemOperand *MMO = - MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); + MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment); SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); } else - Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); + Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI, Alignment); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias on big endian. @@ -7741,6 +7761,7 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, RLI.Chain = Chain; RLI.Ptr = FIPtr; RLI.MPI = MPI; + RLI.Alignment = Alignment; } /// Custom lowers floating point to integer conversions to use @@ -10358,6 +10379,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"); + // FIXME: handle extends from half precision float vectors on P9. // We only want to custom lower an extend from v2f32 to v2f64. if (Op.getValueType() != MVT::v2f64 || Op.getOperand(0).getValueType() != MVT::v2f32) @@ -10571,6 +10593,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: // Don't handle bitcast here. return; + case ISD::FP_EXTEND: + SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG); + if (Lowered) + Results.push_back(Lowered); + return; } } @@ -13589,7 +13616,7 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, (Op1VT == MVT::i32 || Op1VT == MVT::i64 || (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8))); - if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() || + if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() || cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt) return SDValue(); @@ -15252,6 +15279,10 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, if (!VT.isSimple()) return false; + if (VT.isFloatingPoint() && !VT.isVector() && + !Subtarget.allowsUnalignedFPAccess()) + return false; + if (VT.getSimpleVT().isVector()) { if (Subtarget.hasVSX()) { if (VT != MVT::v2f64 && VT != MVT::v2i64 && diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h index e0c381827b87..2e1485373d19 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -637,7 +637,7 @@ namespace llvm { /// then the VPERM for the shuffle. All in all a very slow sequence. TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override { - if (VT.getScalarSizeInBits() % 8 == 0) + if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index f94816a35f79..6e8635f2413c 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1342,6 +1342,10 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>; def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">; let Predicates = [HasP9Altivec] in { +// Vector Multiply-Sum +def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm, + v1i128, v2i64, v1i128>; + // i8 element comparisons. def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>; def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>; diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 30906a32b00c..d7925befcd37 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2631,6 +2631,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, if (Opc != PPC::ADDI && Opc != PPC::ADDI8) return false; + // The operand may not necessarily be an immediate - it could be a relocation. + if (!ADDIMI.getOperand(2).isImm()) + return false; + Imm = ADDIMI.getOperand(2).getImm(); return true; diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td index be6b30ffa08b..95e5ff6b130d 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; + // Load/convert and convert/store patterns for f16. + def : Pat<(f64 (extloadf16 xoaddr:$src)), + (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; + def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; + def : Pat<(f32 (extloadf16 xoaddr:$src)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; + def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; + def : Pat<(f64 (f16_to_fp i32:$A)), + (f64 (XSCVHPDP (MTVSRWZ $A)))>; + def : Pat<(f32 (f16_to_fp i32:$A)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>; + def : Pat<(i32 (fp_to_f16 f32:$A)), + (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>; + def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>; + let Predicates = [IsBigEndian, HasP9Vector] in { // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h index 6dff0c126ab5..044e982740e9 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -124,6 +124,7 @@ protected: bool IsPPC4xx; bool IsPPC6xx; bool FeatureMFTB; + bool AllowsUnalignedFPAccess; bool DeprecatedDST; bool HasLazyResolverStubs; bool IsLittleEndian; @@ -274,6 +275,7 @@ public: bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; } bool isE500() const { return IsE500; } bool isFeatureMFTB() const { return FeatureMFTB; } + bool allowsUnalignedFPAccess() const { return AllowsUnalignedFPAccess; } bool isDeprecatedDST() const { return DeprecatedDST; } bool hasICBT() const { return HasICBT; } bool hasInvariantFunctionDescriptors() const { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index c60fc3fc6b42..f7cd19cbb8e7 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -181,9 +181,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize - RVFI->getVarArgsSaveSize(), MachineInstr::FrameSetup); - // Emit ".cfi_def_cfa $fp, 0" + // Emit ".cfi_def_cfa $fp, -RVFI->getVarArgsSaveSize()" unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( - nullptr, RI->getDwarfRegNum(FPReg, true), 0)); + nullptr, RI->getDwarfRegNum(FPReg, true), -RVFI->getVarArgsSaveSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5a2cffbc824c..a1e3e326a97a 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -197,6 +197,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::f64, MVT::f16, Expand); } + if (Subtarget.is64Bit() && + !(Subtarget.hasStdExtD() || Subtarget.hasStdExtF())) { + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); + } + setOperationAction(ISD::GlobalAddress, XLenVT, Custom); setOperationAction(ISD::BlockAddress, XLenVT, Custom); setOperationAction(ISD::ConstantPool, XLenVT, Custom); @@ -876,6 +884,32 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom type legalize this operation!"); + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + bool IsStrict = N->isStrictFPOpcode(); + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); + RTLIB::Libcall LC; + if (N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::STRICT_FP_TO_SINT) + LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); + else + LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); + MakeLibCallOptions CallOptions; + EVT OpVT = Op0.getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Result; + std::tie(Result, Chain) = + makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); + Results.push_back(Result); + if (IsStrict) + Results.push_back(Chain); + break; + } case ISD::READCYCLECOUNTER: { assert(!Subtarget.is64Bit() && "READCYCLECOUNTER only has custom type legalization on riscv32"); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h b/contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h new file mode 100644 index 000000000000..5833017037a5 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h @@ -0,0 +1,446 @@ +//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: ImmutableGraph is a fast DAG implementation that cannot be +/// modified, except by creating a new ImmutableGraph. ImmutableGraph is +/// implemented as two arrays: one containing nodes, and one containing edges. +/// The advantages to this implementation are two-fold: +/// 1. Iteration and traversal operations benefit from cache locality. +/// 2. Operations on sets of nodes/edges are efficient, and representations of +/// those sets in memory are compact. For instance, a set of edges is +/// implemented as a bit vector, wherein each bit corresponds to one edge in +/// the edge array. This implies a lower bound of 64x spatial improvement +/// over, e.g., an llvm::DenseSet or llvm::SmallSet. It also means that +/// insert/erase/contains operations complete in negligible constant time: +/// insert and erase require one load and one store, and contains requires +/// just one load. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H +#define LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <iterator> +#include <utility> +#include <vector> + +namespace llvm { + +template <typename NodeValueT, typename EdgeValueT> class ImmutableGraph { + using Traits = GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *>; + template <typename> friend class ImmutableGraphBuilder; + +public: + using node_value_type = NodeValueT; + using edge_value_type = EdgeValueT; + using size_type = int; + class Node; + class Edge { + friend class ImmutableGraph; + template <typename> friend class ImmutableGraphBuilder; + + const Node *Dest; + edge_value_type Value; + + public: + const Node *getDest() const { return Dest; }; + const edge_value_type &getValue() const { return Value; } + }; + class Node { + friend class ImmutableGraph; + template <typename> friend class ImmutableGraphBuilder; + + const Edge *Edges; + node_value_type Value; + + public: + const node_value_type &getValue() const { return Value; } + + const Edge *edges_begin() const { return Edges; } + // Nodes are allocated sequentially. Edges for a node are stored together. + // The end of this Node's edges is the beginning of the next node's edges. + // An extra node was allocated to hold the end pointer for the last real + // node. + const Edge *edges_end() const { return (this + 1)->Edges; } + ArrayRef<Edge> edges() const { + return makeArrayRef(edges_begin(), edges_end()); + } + }; + +protected: + ImmutableGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges, + size_type NodesSize, size_type EdgesSize) + : Nodes(std::move(Nodes)), Edges(std::move(Edges)), NodesSize(NodesSize), + EdgesSize(EdgesSize) {} + ImmutableGraph(const ImmutableGraph &) = delete; + ImmutableGraph(ImmutableGraph &&) = delete; + ImmutableGraph &operator=(const ImmutableGraph &) = delete; + ImmutableGraph &operator=(ImmutableGraph &&) = delete; + +public: + ArrayRef<Node> nodes() const { return makeArrayRef(Nodes.get(), NodesSize); } + const Node *nodes_begin() const { return nodes().begin(); } + const Node *nodes_end() const { return nodes().end(); } + + ArrayRef<Edge> edges() const { return makeArrayRef(Edges.get(), EdgesSize); } + const Edge *edges_begin() const { return edges().begin(); } + const Edge *edges_end() const { return edges().end(); } + + size_type nodes_size() const { return NodesSize; } + size_type edges_size() const { return EdgesSize; } + + // Node N must belong to this ImmutableGraph. + size_type getNodeIndex(const Node &N) const { + return std::distance(nodes_begin(), &N); + } + // Edge E must belong to this ImmutableGraph. + size_type getEdgeIndex(const Edge &E) const { + return std::distance(edges_begin(), &E); + } + + // FIXME: Could NodeSet and EdgeSet be templated to share code? + class NodeSet { + const ImmutableGraph &G; + BitVector V; + + public: + NodeSet(const ImmutableGraph &G, bool ContainsAll = false) + : G{G}, V{static_cast<unsigned>(G.nodes_size()), ContainsAll} {} + bool insert(const Node &N) { + size_type Idx = G.getNodeIndex(N); + bool AlreadyExists = V.test(Idx); + V.set(Idx); + return !AlreadyExists; + } + void erase(const Node &N) { + size_type Idx = G.getNodeIndex(N); + V.reset(Idx); + } + bool contains(const Node &N) const { + size_type Idx = G.getNodeIndex(N); + return V.test(Idx); + } + void clear() { V.reset(); } + size_type empty() const { return V.none(); } + /// Return the number of elements in the set + size_type count() const { return V.count(); } + /// Return the size of the set's domain + size_type size() const { return V.size(); } + /// Set union + NodeSet &operator|=(const NodeSet &RHS) { + assert(&this->G == &RHS.G); + V |= RHS.V; + return *this; + } + /// Set intersection + NodeSet &operator&=(const NodeSet &RHS) { + assert(&this->G == &RHS.G); + V &= RHS.V; + return *this; + } + /// Set disjoint union + NodeSet &operator^=(const NodeSet &RHS) { + assert(&this->G == &RHS.G); + V ^= RHS.V; + return *this; + } + + using index_iterator = typename BitVector::const_set_bits_iterator; + index_iterator index_begin() const { return V.set_bits_begin(); } + index_iterator index_end() const { return V.set_bits_end(); } + void set(size_type Idx) { V.set(Idx); } + void reset(size_type Idx) { V.reset(Idx); } + + class iterator { + const NodeSet &Set; + size_type Current; + + void advance() { + assert(Current != -1); + Current = Set.V.find_next(Current); + } + + public: + iterator(const NodeSet &Set, size_type Begin) + : Set{Set}, Current{Begin} {} + iterator operator++(int) { + iterator Tmp = *this; + advance(); + return Tmp; + } + iterator &operator++() { + advance(); + return *this; + } + Node *operator*() const { + assert(Current != -1); + return Set.G.nodes_begin() + Current; + } + bool operator==(const iterator &other) const { + assert(&this->Set == &other.Set); + return this->Current == other.Current; + } + bool operator!=(const iterator &other) const { return !(*this == other); } + }; + + iterator begin() const { return iterator{*this, V.find_first()}; } + iterator end() const { return iterator{*this, -1}; } + }; + + class EdgeSet { + const ImmutableGraph &G; + BitVector V; + + public: + EdgeSet(const ImmutableGraph &G, bool ContainsAll = false) + : G{G}, V{static_cast<unsigned>(G.edges_size()), ContainsAll} {} + bool insert(const Edge &E) { + size_type Idx = G.getEdgeIndex(E); + bool AlreadyExists = V.test(Idx); + V.set(Idx); + return !AlreadyExists; + } + void erase(const Edge &E) { + size_type Idx = G.getEdgeIndex(E); + V.reset(Idx); + } + bool contains(const Edge &E) const { + size_type Idx = G.getEdgeIndex(E); + return V.test(Idx); + } + void clear() { V.reset(); } + bool empty() const { return V.none(); } + /// Return the number of elements in the set + size_type count() const { return V.count(); } + /// Return the size of the set's domain + size_type size() const { return V.size(); } + /// Set union + EdgeSet &operator|=(const EdgeSet &RHS) { + assert(&this->G == &RHS.G); + V |= RHS.V; + return *this; + } + /// Set intersection + EdgeSet &operator&=(const EdgeSet &RHS) { + assert(&this->G == &RHS.G); + V &= RHS.V; + return *this; + } + /// Set disjoint union + EdgeSet &operator^=(const EdgeSet &RHS) { + assert(&this->G == &RHS.G); + V ^= RHS.V; + return *this; + } + + using index_iterator = typename BitVector::const_set_bits_iterator; + index_iterator index_begin() const { return V.set_bits_begin(); } + index_iterator index_end() const { return V.set_bits_end(); } + void set(size_type Idx) { V.set(Idx); } + void reset(size_type Idx) { V.reset(Idx); } + + class iterator { + const EdgeSet &Set; + size_type Current; + + void advance() { + assert(Current != -1); + Current = Set.V.find_next(Current); + } + + public: + iterator(const EdgeSet &Set, size_type Begin) + : Set{Set}, Current{Begin} {} + iterator operator++(int) { + iterator Tmp = *this; + advance(); + return Tmp; + } + iterator &operator++() { + advance(); + return *this; + } + Edge *operator*() const { + assert(Current != -1); + return Set.G.edges_begin() + Current; + } + bool operator==(const iterator &other) const { + assert(&this->Set == &other.Set); + return this->Current == other.Current; + } + bool operator!=(const iterator &other) const { return !(*this == other); } + }; + + iterator begin() const { return iterator{*this, V.find_first()}; } + iterator end() const { return iterator{*this, -1}; } + }; + +private: + std::unique_ptr<Node[]> Nodes; + std::unique_ptr<Edge[]> Edges; + size_type NodesSize; + size_type EdgesSize; +}; + +template <typename GraphT> class ImmutableGraphBuilder { + using node_value_type = typename GraphT::node_value_type; + using edge_value_type = typename GraphT::edge_value_type; + static_assert( + std::is_base_of<ImmutableGraph<node_value_type, edge_value_type>, + GraphT>::value, + "Template argument to ImmutableGraphBuilder must derive from " + "ImmutableGraph<>"); + using size_type = typename GraphT::size_type; + using NodeSet = typename GraphT::NodeSet; + using Node = typename GraphT::Node; + using EdgeSet = typename GraphT::EdgeSet; + using Edge = typename GraphT::Edge; + using BuilderEdge = std::pair<edge_value_type, size_type>; + using EdgeList = std::vector<BuilderEdge>; + using BuilderVertex = std::pair<node_value_type, EdgeList>; + using VertexVec = std::vector<BuilderVertex>; + +public: + using BuilderNodeRef = size_type; + + BuilderNodeRef addVertex(const node_value_type &V) { + auto I = AdjList.emplace(AdjList.end(), V, EdgeList{}); + return std::distance(AdjList.begin(), I); + } + + void addEdge(const edge_value_type &E, BuilderNodeRef From, + BuilderNodeRef To) { + AdjList[From].second.emplace_back(E, To); + } + + bool empty() const { return AdjList.empty(); } + + template <typename... ArgT> std::unique_ptr<GraphT> get(ArgT &&... Args) { + size_type VertexSize = AdjList.size(), EdgeSize = 0; + for (const auto &V : AdjList) { + EdgeSize += V.second.size(); + } + auto VertexArray = + std::make_unique<Node[]>(VertexSize + 1 /* terminator node */); + auto EdgeArray = std::make_unique<Edge[]>(EdgeSize); + size_type VI = 0, EI = 0; + for (; VI < VertexSize; ++VI) { + VertexArray[VI].Value = std::move(AdjList[VI].first); + VertexArray[VI].Edges = &EdgeArray[EI]; + auto NumEdges = static_cast<size_type>(AdjList[VI].second.size()); + for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) { + auto &E = AdjList[VI].second[VEI]; + EdgeArray[EI].Value = std::move(E.first); + EdgeArray[EI].Dest = &VertexArray[E.second]; + } + } + assert(VI == VertexSize && EI == EdgeSize && "ImmutableGraph malformed"); + VertexArray[VI].Edges = &EdgeArray[EdgeSize]; // terminator node + return std::make_unique<GraphT>(std::move(VertexArray), + std::move(EdgeArray), VertexSize, EdgeSize, + std::forward<ArgT>(Args)...); + } + + template <typename... ArgT> + static std::unique_ptr<GraphT> trim(const GraphT &G, const NodeSet &TrimNodes, + const EdgeSet &TrimEdges, + ArgT &&... Args) { + size_type NewVertexSize = G.nodes_size() - TrimNodes.count(); + size_type NewEdgeSize = G.edges_size() - TrimEdges.count(); + auto NewVertexArray = + std::make_unique<Node[]>(NewVertexSize + 1 /* terminator node */); + auto NewEdgeArray = std::make_unique<Edge[]>(NewEdgeSize); + + // Walk the nodes and determine the new index for each node. + size_type NewNodeIndex = 0; + std::vector<size_type> RemappedNodeIndex(G.nodes_size()); + for (const Node &N : G.nodes()) { + if (TrimNodes.contains(N)) + continue; + RemappedNodeIndex[G.getNodeIndex(N)] = NewNodeIndex++; + } + assert(NewNodeIndex == NewVertexSize && + "Should have assigned NewVertexSize indices"); + + size_type VertexI = 0, EdgeI = 0; + for (const Node &N : G.nodes()) { + if (TrimNodes.contains(N)) + continue; + NewVertexArray[VertexI].Value = N.getValue(); + NewVertexArray[VertexI].Edges = &NewEdgeArray[EdgeI]; + for (const Edge &E : N.edges()) { + if (TrimEdges.contains(E)) + continue; + NewEdgeArray[EdgeI].Value = E.getValue(); + size_type DestIdx = G.getNodeIndex(*E.getDest()); + size_type NewIdx = RemappedNodeIndex[DestIdx]; + assert(NewIdx < NewVertexSize); + NewEdgeArray[EdgeI].Dest = &NewVertexArray[NewIdx]; + ++EdgeI; + } + ++VertexI; + } + assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize && + "Gadget graph malformed"); + NewVertexArray[VertexI].Edges = &NewEdgeArray[NewEdgeSize]; // terminator + return std::make_unique<GraphT>(std::move(NewVertexArray), + std::move(NewEdgeArray), NewVertexSize, + NewEdgeSize, std::forward<ArgT>(Args)...); + } + +private: + VertexVec AdjList; +}; + +template <typename NodeValueT, typename EdgeValueT> +struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> { + using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>; + using NodeRef = typename GraphT::Node const *; + using EdgeRef = typename GraphT::Edge const &; + + static NodeRef edge_dest(EdgeRef E) { return E.getDest(); } + using ChildIteratorType = + mapped_iterator<typename GraphT::Edge const *, decltype(&edge_dest)>; + + static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); } + static ChildIteratorType child_begin(NodeRef N) { + return {N->edges_begin(), &edge_dest}; + } + static ChildIteratorType child_end(NodeRef N) { + return {N->edges_end(), &edge_dest}; + } + + static NodeRef getNode(typename GraphT::Node const &N) { return NodeRef{&N}; } + using nodes_iterator = + mapped_iterator<typename GraphT::Node const *, decltype(&getNode)>; + static nodes_iterator nodes_begin(GraphT *G) { + return {G->nodes_begin(), &getNode}; + } + static nodes_iterator nodes_end(GraphT *G) { + return {G->nodes_end(), &getNode}; + } + + using ChildEdgeIteratorType = typename GraphT::Edge const *; + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->edges_begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { + return N->edges_end(); + } + static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.h b/contrib/llvm-project/llvm/lib/Target/X86/X86.h index 0481a40d462a..a0ab5c3a5b3c 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.h @@ -120,7 +120,7 @@ FunctionPass *createX86DomainReassignmentPass(); FunctionPass *createX86EvexToVexInsts(); /// This pass creates the thunks for the retpoline feature. -FunctionPass *createX86RetpolineThunksPass(); +FunctionPass *createX86IndirectThunksPass(); /// This pass ensures instructions featuring a memory operand /// have distinctive <LineNumber, Discriminator> (with respect to eachother) @@ -133,6 +133,9 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &, X86RegisterBankInfo &); +FunctionPass *createX86LoadValueInjectionLoadHardeningPass(); +FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass(); +FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); void initializeEvexToVexInstPassPass(PassRegistry &); @@ -148,6 +151,9 @@ void initializeX86DomainReassignmentPass(PassRegistry &); void initializeX86ExecutionDomainFixPass(PassRegistry &); void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); +void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &); +void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &); +void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &); void initializeX86OptimizeLEAPassPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td index a2b11d55f650..bb8952f54e3a 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td @@ -426,6 +426,22 @@ def FeatureRetpolineExternalThunk "ourselves. Only has effect when combined with some other retpoline " "feature", [FeatureRetpolineIndirectCalls]>; +// Mitigate LVI attacks against indirect calls/branches and call returns +def FeatureLVIControlFlowIntegrity + : SubtargetFeature< + "lvi-cfi", "UseLVIControlFlowIntegrity", "true", + "Prevent indirect calls/branches from using a memory operand, and " + "precede all indirect calls/branches from a register with an " + "LFENCE instruction to serialize control flow. Also decompose RET " + "instructions into a POP+LFENCE+JMP sequence.">; + +// Mitigate LVI attacks against data loads +def FeatureLVILoadHardening + : SubtargetFeature< + "lvi-load-hardening", "UseLVILoadHardening", "true", + "Insert LFENCE instructions to prevent data speculatively injected " + "into loads from being used maliciously.">; + // Direct Move instructions. def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", "Support movdiri instruction">; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp index 39d16e7999cd..1ac291fcb887 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -404,7 +404,7 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI, static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, char Mode, raw_ostream &O) { Register Reg = MO.getReg(); - bool EmitPercent = true; + bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT; if (!X86::GR8RegClass.contains(Reg) && !X86::GR16RegClass.contains(Reg) && @@ -443,6 +443,42 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, return false; } +static bool printAsmVRegister(X86AsmPrinter &P, const MachineOperand &MO, + char Mode, raw_ostream &O) { + unsigned Reg = MO.getReg(); + bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT; + + unsigned Index; + if (X86::VR128XRegClass.contains(Reg)) + Index = Reg - X86::XMM0; + else if (X86::VR256XRegClass.contains(Reg)) + Index = Reg - X86::YMM0; + else if (X86::VR512RegClass.contains(Reg)) + Index = Reg - X86::ZMM0; + else + return true; + + switch (Mode) { + default: // Unknown mode. + return true; + case 'x': // Print V4SFmode register + Reg = X86::XMM0 + Index; + break; + case 't': // Print V8SFmode register + Reg = X86::YMM0 + Index; + break; + case 'g': // Print V16SFmode register + Reg = X86::ZMM0 + Index; + break; + } + + if (EmitPercent) + O << '%'; + + O << X86ATTInstPrinter::getRegisterName(Reg); + return false; +} + /// PrintAsmOperand - Print out an operand for an inline asm expression. /// bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -517,6 +553,14 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, PrintOperand(MI, OpNo, O); return false; + case 'x': // Print V4SFmode register + case 't': // Print V8SFmode register + case 'g': // Print V16SFmode register + if (MO.isReg()) + return printAsmVRegister(*this, MO, ExtraCode[0], O); + PrintOperand(MI, OpNo, O); + return false; + case 'P': // This is the operand of a call, treat specially. PrintPCRelImm(MI, OpNo, O); return false; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp index 1dbf40683564..a1d256ea872d 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp @@ -3202,8 +3202,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) return false; - // Functions using retpoline for indirect calls need to use SDISel. - if (Subtarget->useRetpolineIndirectCalls()) + // Functions using thunks for indirect calls need to use SDISel. + if (Subtarget->useIndirectThunkCalls()) return false; // Handle only C, fastcc, and webkit_js calling conventions for now. diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp index 799c1f5d1285..1da20371caf5 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -765,10 +765,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, bool InProlog) const { bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; - // FIXME: Add retpoline support and remove this. - if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls()) + // FIXME: Add indirect thunk support and remove this. + if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls()) report_fatal_error("Emitting stack probe calls on 64-bit with the large " - "code model and retpoline not yet implemented."); + "code model and indirect thunks not yet implemented."); unsigned CallOp; if (Is64Bit) @@ -2493,9 +2493,9 @@ void X86FrameLowering::adjustForSegmentedStacks( // is laid out within 2^31 bytes of each function body, but this seems // to be sufficient for JIT. // FIXME: Add retpoline support and remove the error here.. - if (STI.useRetpolineIndirectCalls()) + if (STI.useIndirectThunkCalls()) report_fatal_error("Emitting morestack calls on 64-bit with the large " - "code model and retpoline not yet implemented."); + "code model and thunks not yet implemented."); BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) .addReg(X86::RIP) .addImm(0) diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index bf33f399db28..88af0ebcfd0e 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -987,7 +987,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && // Only do this when the target can fold the load into the call or // jmp. - !Subtarget->useRetpolineIndirectCalls() && + !Subtarget->useIndirectThunkCalls() && ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && (Subtarget->is64Bit() || diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index cbdd7135de43..c8720d9ae3a6 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23319,7 +23319,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, for (unsigned i = 0; i != NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { - Elts.push_back(CurrentOp); + // Must produce 0s in the correct bits. + Elts.push_back(DAG.getConstant(0, dl, ElementType)); continue; } auto *ND = cast<ConstantSDNode>(CurrentOp); @@ -23331,7 +23332,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, for (unsigned i = 0; i != NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { - Elts.push_back(CurrentOp); + // Must produce 0s in the correct bits. + Elts.push_back(DAG.getConstant(0, dl, ElementType)); continue; } auto *ND = cast<ConstantSDNode>(CurrentOp); @@ -23343,7 +23345,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, for (unsigned i = 0; i != NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { - Elts.push_back(CurrentOp); + // All shifted in bits must be the same so use 0. + Elts.push_back(DAG.getConstant(0, dl, ElementType)); continue; } auto *ND = cast<ConstantSDNode>(CurrentOp); @@ -30218,8 +30221,8 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask, } bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { - // If the subtarget is using retpolines, we need to not generate jump tables. - if (Subtarget.useRetpolineIndirectBranches()) + // If the subtarget is using thunks, we need to not generate jump tables. + if (Subtarget.useIndirectThunkBranches()) return false; // Otherwise, fallback on the generic logic. @@ -31342,22 +31345,22 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, return BB; } -static unsigned getOpcodeForRetpoline(unsigned RPOpc) { +static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) { switch (RPOpc) { - case X86::RETPOLINE_CALL32: + case X86::INDIRECT_THUNK_CALL32: return X86::CALLpcrel32; - case X86::RETPOLINE_CALL64: + case X86::INDIRECT_THUNK_CALL64: return X86::CALL64pcrel32; - case X86::RETPOLINE_TCRETURN32: + case X86::INDIRECT_THUNK_TCRETURN32: return X86::TCRETURNdi; - case X86::RETPOLINE_TCRETURN64: + case X86::INDIRECT_THUNK_TCRETURN64: return X86::TCRETURNdi64; } - llvm_unreachable("not retpoline opcode"); + llvm_unreachable("not indirect thunk opcode"); } -static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, - unsigned Reg) { +static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget, + unsigned Reg) { if (Subtarget.useRetpolineExternalThunk()) { // When using an external thunk for retpolines, we pick names that match the // names GCC happens to use as well. This helps simplify the implementation @@ -31389,39 +31392,48 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); return "__x86_indirect_thunk_r11"; } + llvm_unreachable("unexpected reg for external indirect thunk"); + } + + if (Subtarget.useRetpolineIndirectCalls() || + Subtarget.useRetpolineIndirectBranches()) { + // When targeting an internal COMDAT thunk use an LLVM-specific name. + switch (Reg) { + case X86::EAX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_eax"; + case X86::ECX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_ecx"; + case X86::EDX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_edx"; + case X86::EDI: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_edi"; + case X86::R11: + assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); + return "__llvm_retpoline_r11"; + } llvm_unreachable("unexpected reg for retpoline"); } - // When targeting an internal COMDAT thunk use an LLVM-specific name. - switch (Reg) { - case X86::EAX: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_eax"; - case X86::ECX: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_ecx"; - case X86::EDX: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_edx"; - case X86::EDI: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_edi"; - case X86::R11: + if (Subtarget.useLVIControlFlowIntegrity()) { assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); - return "__llvm_retpoline_r11"; + return "__llvm_lvi_thunk_r11"; } - llvm_unreachable("unexpected reg for retpoline"); + llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature"); } MachineBasicBlock * -X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, - MachineBasicBlock *BB) const { +X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI, + MachineBasicBlock *BB) const { // Copy the virtual register into the R11 physical register and // call the retpoline thunk. DebugLoc DL = MI.getDebugLoc(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); Register CalleeVReg = MI.getOperand(0).getReg(); - unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); + unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode()); // Find an available scratch register to hold the callee. On 64-bit, we can // just use R11, but we scan for uses anyway to ensure we don't generate @@ -31455,7 +31467,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, report_fatal_error("calling convention incompatible with retpoline, no " "available registers"); - const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg); + const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) .addReg(CalleeVReg); @@ -32231,11 +32243,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::TLS_base_addr32: case X86::TLS_base_addr64: return EmitLoweredTLSAddr(MI, BB); - case X86::RETPOLINE_CALL32: - case X86::RETPOLINE_CALL64: - case X86::RETPOLINE_TCRETURN32: - case X86::RETPOLINE_TCRETURN64: - return EmitLoweredRetpoline(MI, BB); + case X86::INDIRECT_THUNK_CALL32: + case X86::INDIRECT_THUNK_CALL64: + case X86::INDIRECT_THUNK_TCRETURN32: + case X86::INDIRECT_THUNK_TCRETURN64: + return EmitLoweredIndirectThunk(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::CATCHPAD: @@ -33998,6 +34010,7 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops, return SDValue(); // Shuffle the constant bits according to the mask. + SDLoc DL(Root); APInt UndefElts(NumMaskElts, 0); APInt ZeroElts(NumMaskElts, 0); APInt ConstantElts(NumMaskElts, 0); @@ -34035,6 +34048,10 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops, } assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue()); + // Attempt to create a zero vector. + if ((UndefElts | ZeroElts).isAllOnesValue()) + return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL); + // Create the constant data. MVT MaskSVT; if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64)) @@ -34043,8 +34060,9 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops, MaskSVT = MVT::getIntegerVT(MaskSizeInBits); MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts); + if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) + return SDValue(); - SDLoc DL(Root); SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL); return DAG.getBitcast(VT, CstOp); } @@ -39693,14 +39711,22 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) { assert(EltBits.size() == VT.getVectorNumElements() && "Unexpected shift value type"); - for (APInt &Elt : EltBits) { - if (X86ISD::VSHLI == Opcode) + // Undef elements need to fold to 0. It's possible SimplifyDemandedBits + // created an undef input due to no input bits being demanded, but user + // still expects 0 in other bits. + for (unsigned i = 0, e = EltBits.size(); i != e; ++i) { + APInt &Elt = EltBits[i]; + if (UndefElts[i]) + Elt = 0; + else if (X86ISD::VSHLI == Opcode) Elt <<= ShiftVal; else if (X86ISD::VSRAI == Opcode) Elt.ashrInPlace(ShiftVal); else Elt.lshrInPlace(ShiftVal); } + // Reset undef elements since they were zeroed above. + UndefElts = 0; return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h index 3a17099da38f..830cdfc79c0a 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h @@ -1482,8 +1482,8 @@ namespace llvm { MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp index 48d0d8a35704..0a79b793a980 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp @@ -18,6 +18,7 @@ #include "X86.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "X86TargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -102,7 +103,16 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) { // Check that the cf-protection-branch is enabled. Metadata *isCFProtectionSupported = MF.getMMI().getModule()->getModuleFlag("cf-protection-branch"); - if (!isCFProtectionSupported && !IndirectBranchTracking) + // NB: We need to enable IBT in jitted code if JIT compiler is CET + // enabled. + const X86TargetMachine *TM = + static_cast<const X86TargetMachine *>(&MF.getTarget()); +#ifdef __CET__ + bool isJITwithCET = TM->isJIT(); +#else + bool isJITwithCET = false; +#endif + if (!isCFProtectionSupported && !IndirectBranchTracking && !isJITwithCET) return false; // True if the current MF was changed and false otherwise. @@ -111,10 +121,11 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) { TII = SubTarget.getInstrInfo(); EndbrOpcode = SubTarget.is64Bit() ? X86::ENDBR64 : X86::ENDBR32; - // Non-internal function or function whose address was taken, can be - // accessed through indirect calls. Mark the first BB with ENDBR instruction - // unless nocf_check attribute is used. - if ((MF.getFunction().hasAddressTaken() || + // Large code model, non-internal function or function whose address + // was taken, can be accessed through indirect calls. Mark the first + // BB with ENDBR instruction unless nocf_check attribute is used. + if ((TM->getCodeModel() == CodeModel::Large || + MF.getFunction().hasAddressTaken() || !MF.getFunction().hasLocalLinkage()) && !MF.getFunction().doesNoCfCheck()) { auto MBB = MF.begin(); @@ -127,11 +138,18 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) { if (MBB.hasAddressTaken()) Changed |= addENDBR(MBB, MBB.begin()); + // Exception handle may indirectly jump to catch pad, So we should add + // ENDBR before catch pad instructions. + bool EHPadIBTNeeded = MBB.isEHPad(); + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { - if (!I->isCall()) - continue; - if (IsCallReturnTwice(I->getOperand(0))) + if (I->isCall() && IsCallReturnTwice(I->getOperand(0))) + Changed |= addENDBR(MBB, std::next(I)); + + if (EHPadIBTNeeded && I->isEHLabel()) { Changed |= addENDBR(MBB, std::next(I)); + EHPadIBTNeeded = false; + } } } return Changed; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp new file mode 100644 index 000000000000..36b9c3ccc959 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp @@ -0,0 +1,364 @@ +//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Pass that injects an MI thunk that is used to lower indirect calls in a way +/// that prevents speculation on some x86 processors and can be used to mitigate +/// security vulnerabilities due to targeted speculative execution and side +/// channels such as CVE-2017-5715. +/// +/// Currently supported thunks include: +/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls +/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization +/// before making an indirect call/jump +/// +/// Note that the reason that this is implemented as a MachineFunctionPass and +/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline +/// serialize all transformations, which can consume lots of memory. +/// +/// TODO(chandlerc): All of this code could use better comments and +/// documentation. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-retpoline-thunks" + +static const char RetpolineNamePrefix[] = "__llvm_retpoline_"; +static const char R11RetpolineName[] = "__llvm_retpoline_r11"; +static const char EAXRetpolineName[] = "__llvm_retpoline_eax"; +static const char ECXRetpolineName[] = "__llvm_retpoline_ecx"; +static const char EDXRetpolineName[] = "__llvm_retpoline_edx"; +static const char EDIRetpolineName[] = "__llvm_retpoline_edi"; + +static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_"; +static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11"; + +namespace { +template <typename Derived> class ThunkInserter { + Derived &getDerived() { return *static_cast<Derived *>(this); } + +protected: + bool InsertedThunks; + void doInitialization(Module &M) {} + void createThunkFunction(MachineModuleInfo &MMI, StringRef Name); + +public: + void init(Module &M) { + InsertedThunks = false; + getDerived().doInitialization(M); + } + // return `true` if `MMI` or `MF` was modified + bool run(MachineModuleInfo &MMI, MachineFunction &MF); +}; + +struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> { + const char *getThunkPrefix() { return RetpolineNamePrefix; } + bool mayUseThunk(const MachineFunction &MF) { + const auto &STI = MF.getSubtarget<X86Subtarget>(); + return (STI.useRetpolineIndirectCalls() || + STI.useRetpolineIndirectBranches()) && + !STI.useRetpolineExternalThunk(); + } + void insertThunks(MachineModuleInfo &MMI); + void populateThunk(MachineFunction &MF); +}; + +struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> { + const char *getThunkPrefix() { return LVIThunkNamePrefix; } + bool mayUseThunk(const MachineFunction &MF) { + return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity(); + } + void insertThunks(MachineModuleInfo &MMI) { + createThunkFunction(MMI, R11LVIThunkName); + } + void populateThunk(MachineFunction &MF) { + // Grab the entry MBB and erase any other blocks. O0 codegen appears to + // generate two bbs for the entry block. + MachineBasicBlock *Entry = &MF.front(); + Entry->clear(); + while (MF.size() > 1) + MF.erase(std::next(MF.begin())); + + // This code mitigates LVI by replacing each indirect call/jump with a + // direct call/jump to a thunk that looks like: + // ``` + // lfence + // jmpq *%r11 + // ``` + // This ensures that if the value in register %r11 was loaded from memory, + // then the value in %r11 is (architecturally) correct prior to the jump. + const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); + BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11); + MF.front().addLiveIn(X86::R11); + return; + } +}; + +class X86IndirectThunks : public MachineFunctionPass { +public: + static char ID; + + X86IndirectThunks() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { return "X86 Indirect Thunks"; } + + bool doInitialization(Module &M) override; + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addPreserved<MachineModuleInfoWrapperPass>(); + } + +private: + std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs; + + // FIXME: When LLVM moves to C++17, these can become folds + template <typename... ThunkInserterT> + static void initTIs(Module &M, + std::tuple<ThunkInserterT...> &ThunkInserters) { + (void)std::initializer_list<int>{ + (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...}; + } + template <typename... ThunkInserterT> + static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF, + std::tuple<ThunkInserterT...> &ThunkInserters) { + bool Modified = false; + (void)std::initializer_list<int>{ + Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...}; + return Modified; + } +}; + +} // end anonymous namespace + +void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) { + if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64) + createThunkFunction(MMI, R11RetpolineName); + else + for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName, + EDIRetpolineName}) + createThunkFunction(MMI, Name); +} + +void RetpolineThunkInserter::populateThunk(MachineFunction &MF) { + bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64; + Register ThunkReg; + if (Is64Bit) { + assert(MF.getName() == "__llvm_retpoline_r11" && + "Should only have an r11 thunk on 64-bit targets"); + + // __llvm_retpoline_r11: + // callq .Lr11_call_target + // .Lr11_capture_spec: + // pause + // lfence + // jmp .Lr11_capture_spec + // .align 16 + // .Lr11_call_target: + // movq %r11, (%rsp) + // retq + ThunkReg = X86::R11; + } else { + // For 32-bit targets we need to emit a collection of thunks for various + // possible scratch registers as well as a fallback that uses EDI, which is + // normally callee saved. + // __llvm_retpoline_eax: + // calll .Leax_call_target + // .Leax_capture_spec: + // pause + // jmp .Leax_capture_spec + // .align 16 + // .Leax_call_target: + // movl %eax, (%esp) # Clobber return addr + // retl + // + // __llvm_retpoline_ecx: + // ... # Same setup + // movl %ecx, (%esp) + // retl + // + // __llvm_retpoline_edx: + // ... # Same setup + // movl %edx, (%esp) + // retl + // + // __llvm_retpoline_edi: + // ... # Same setup + // movl %edi, (%esp) + // retl + if (MF.getName() == EAXRetpolineName) + ThunkReg = X86::EAX; + else if (MF.getName() == ECXRetpolineName) + ThunkReg = X86::ECX; + else if (MF.getName() == EDXRetpolineName) + ThunkReg = X86::EDX; + else if (MF.getName() == EDIRetpolineName) + ThunkReg = X86::EDI; + else + llvm_unreachable("Invalid thunk name on x86-32!"); + } + + const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); + // Grab the entry MBB and erase any other blocks. O0 codegen appears to + // generate two bbs for the entry block. + MachineBasicBlock *Entry = &MF.front(); + Entry->clear(); + while (MF.size() > 1) + MF.erase(std::next(MF.begin())); + + MachineBasicBlock *CaptureSpec = + MF.CreateMachineBasicBlock(Entry->getBasicBlock()); + MachineBasicBlock *CallTarget = + MF.CreateMachineBasicBlock(Entry->getBasicBlock()); + MCSymbol *TargetSym = MF.getContext().createTempSymbol(); + MF.push_back(CaptureSpec); + MF.push_back(CallTarget); + + const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; + const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; + + Entry->addLiveIn(ThunkReg); + BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); + + // The MIR verifier thinks that the CALL in the entry block will fall through + // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is + // the successor, but the MIR verifier doesn't know how to cope with that. + Entry->addSuccessor(CaptureSpec); + + // In the capture loop for speculation, we want to stop the processor from + // speculating as fast as possible. On Intel processors, the PAUSE instruction + // will block speculation without consuming any execution resources. On AMD + // processors, the PAUSE instruction is (essentially) a nop, so we also use an + // LFENCE instruction which they have advised will stop speculation as well + // with minimal resource utilization. We still end the capture with a jump to + // form an infinite loop to fully guarantee that no matter what implementation + // of the x86 ISA, speculating this code path never escapes. + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); + CaptureSpec->setHasAddressTaken(); + CaptureSpec->addSuccessor(CaptureSpec); + + CallTarget->addLiveIn(ThunkReg); + CallTarget->setHasAddressTaken(); + CallTarget->setAlignment(Align(16)); + + // Insert return address clobber + const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; + const Register SPReg = Is64Bit ? X86::RSP : X86::ESP; + addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false, + 0) + .addReg(ThunkReg); + + CallTarget->back().setPreInstrSymbol(MF, TargetSym); + BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); +} + +template <typename Derived> +void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI, + StringRef Name) { + assert(Name.startswith(getDerived().getThunkPrefix()) && + "Created a thunk with an unexpected prefix!"); + + Module &M = const_cast<Module &>(*MMI.getModule()); + LLVMContext &Ctx = M.getContext(); + auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); + Function *F = + Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); + F->setVisibility(GlobalValue::HiddenVisibility); + F->setComdat(M.getOrInsertComdat(Name)); + + // Add Attributes so that we don't create a frame, unwind information, or + // inline. + AttrBuilder B; + B.addAttribute(llvm::Attribute::NoUnwind); + B.addAttribute(llvm::Attribute::Naked); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); + + // Populate our function a bit so that we can verify. + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); + IRBuilder<> Builder(Entry); + + Builder.CreateRetVoid(); + + // MachineFunctions/MachineBasicBlocks aren't created automatically for the + // IR-level constructs we already made. Create them and insert them into the + // module. + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); + MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); + + // Insert EntryMBB into MF. It's not in the module until we do this. + MF.insert(MF.end(), EntryMBB); + // Set MF properties. We never use vregs... + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); +} + +template <typename Derived> +bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) { + // If MF is not a thunk, check to see if we need to insert a thunk. + if (!MF.getName().startswith(getDerived().getThunkPrefix())) { + // If we've already inserted a thunk, nothing else to do. + if (InsertedThunks) + return false; + + // Only add a thunk if one of the functions has the corresponding feature + // enabled in its subtarget, and doesn't enable external thunks. + // FIXME: Conditionalize on indirect calls so we don't emit a thunk when + // nothing will end up calling it. + // FIXME: It's a little silly to look at every function just to enumerate + // the subtargets, but eventually we'll want to look at them for indirect + // calls, so maybe this is OK. + if (!getDerived().mayUseThunk(MF)) + return false; + + getDerived().insertThunks(MMI); + InsertedThunks = true; + return true; + } + + // If this *is* a thunk function, we need to populate it with the correct MI. + getDerived().populateThunk(MF); + return true; +} + +FunctionPass *llvm::createX86IndirectThunksPass() { + return new X86IndirectThunks(); +} + +char X86IndirectThunks::ID = 0; + +bool X86IndirectThunks::doInitialization(Module &M) { + initTIs(M, TIs); + return false; +} + +bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << getPassName() << '\n'); + auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); + return runTIs(MMI, MF, TIs); +} diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td index 78d8dd3c0d03..1fdac104cb73 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1213,14 +1213,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[Not64BitMode, NotUseIndirectThunkCalls]>; // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>; + Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi tglobaladdr:$dst, imm:$off)>, @@ -1232,21 +1232,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode, NotUseIndirectThunkCalls]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode, NotUseIndirectThunkCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), - (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode, UseRetpolineIndirectCalls]>; + (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[In64BitMode, UseIndirectThunkCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), - (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[Not64BitMode, UseRetpolineIndirectCalls]>; + (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[Not64BitMode, UseIndirectThunkCalls]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td index 32faeb1a86f2..1842dc19ec2e 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td @@ -237,13 +237,13 @@ let isCall = 1 in Sched<[WriteJumpLd]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32, - Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>, + Requires<[Not64BitMode,NotUseIndirectThunkCalls]>, Sched<[WriteJump]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>, OpSize32, Requires<[Not64BitMode,FavorMemIndirectCall, - NotUseRetpolineIndirectCalls]>, + NotUseIndirectThunkCalls]>, Sched<[WriteJumpLd]>; // Non-tracking calls for IBT, use with caution. @@ -334,11 +334,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { Requires<[In64BitMode]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode,NotUseIndirectThunkCalls]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, Requires<[In64BitMode,FavorMemIndirectCall, - NotUseRetpolineIndirectCalls]>; + NotUseIndirectThunkCalls]>; // Non-tracking calls for IBT, use with caution. let isCodeGenOnly = 1 in { @@ -393,19 +393,19 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1, Uses = [RSP, SSP], usesCustomInserter = 1, SchedRW = [WriteJump] in { - def RETPOLINE_CALL32 : + def INDIRECT_THUNK_CALL32 : PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>, - Requires<[Not64BitMode,UseRetpolineIndirectCalls]>; + Requires<[Not64BitMode,UseIndirectThunkCalls]>; - def RETPOLINE_CALL64 : + def INDIRECT_THUNK_CALL64 : PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>, - Requires<[In64BitMode,UseRetpolineIndirectCalls]>; + Requires<[In64BitMode,UseIndirectThunkCalls]>; - // Retpoline variant of indirect tail calls. + // Indirect thunk variant of indirect tail calls. let isTerminator = 1, isReturn = 1, isBarrier = 1 in { - def RETPOLINE_TCRETURN64 : + def INDIRECT_THUNK_TCRETURN64 : PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>; - def RETPOLINE_TCRETURN32 : + def INDIRECT_THUNK_TCRETURN32 : PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>; } } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp index 245346d82731..90484241c28c 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3956,6 +3956,8 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm); MIB->setDesc(TII.get(X86::POP32r)); } + MIB->RemoveOperand(1); + MIB->addImplicitDefUseOperands(*MBB.getParent()); // Build CFI if necessary. MachineFunction &MF = *MBB.getParent(); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td index ca5425e8b89f..93f40c8ec996 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td @@ -996,8 +996,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; def HasERMSB : Predicate<"Subtarget->hasERMSB()">; def HasMFence : Predicate<"Subtarget->hasMFence()">; -def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">; -def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">; +def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">; +def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp new file mode 100644 index 000000000000..35fc439998f9 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -0,0 +1,900 @@ +//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: This pass finds Load Value Injection (LVI) gadgets consisting +/// of a load from memory (i.e., SOURCE), and any operation that may transmit +/// the value loaded from memory over a covert channel, or use the value loaded +/// from memory to determine a branch/call target (i.e., SINK). After finding +/// all such gadgets in a given function, the pass minimally inserts LFENCE +/// instructions in such a manner that the following property is satisfied: for +/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at +/// least one LFENCE instruction. The algorithm that implements this minimal +/// insertion is influenced by an academic paper that minimally inserts memory +/// fences for high-performance concurrent programs: +/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf +/// The algorithm implemented in this pass is as follows: +/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the +/// following components: +/// - SOURCE instructions (also includes function arguments) +/// - SINK instructions +/// - Basic block entry points +/// - Basic block terminators +/// - LFENCE instructions +/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e., +/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been +/// mitigated, go to step 6. +/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion. +/// 4. Insert one LFENCE along each CFG edge that was cut in step 3. +/// 5. Go to step 2. +/// 6. If any LFENCEs were inserted, return `true` from runOnMachineFunction() +/// to tell LLVM that the function was modified. +/// +//===----------------------------------------------------------------------===// + +#include "ImmutableGraph.h" +#include "X86.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define PASS_KEY "x86-lvi-load" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); +STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); +STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " + "were deployed"); +STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis"); + +static cl::opt<std::string> OptimizePluginPath( + PASS_KEY "-opt-plugin", + cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden); + +static cl::opt<bool> NoConditionalBranches( + PASS_KEY "-no-cbranch", + cl::desc("Don't treat conditional branches as disclosure gadgets. This " + "may improve performance, at the cost of security."), + cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDot( + PASS_KEY "-dot", + cl::desc( + "For each function, emit a dot graph depicting potential LVI gadgets"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDotOnly( + PASS_KEY "-dot-only", + cl::desc("For each function, emit a dot graph depicting potential LVI " + "gadgets, and do not insert any fences"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDotVerify( + PASS_KEY "-dot-verify", + cl::desc("For each function, emit a dot graph to stdout depicting " + "potential LVI gadgets, used for testing purposes only"), + cl::init(false), cl::Hidden); + +static llvm::sys::DynamicLibrary OptimizeDL; +typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size, + unsigned int *edges, int *edge_values, + int *cut_edges /* out */, unsigned int edges_size); +static OptimizeCutT OptimizeCut = nullptr; + +namespace { + +struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> { + static constexpr int GadgetEdgeSentinel = -1; + static constexpr MachineInstr *const ArgNodeSentinel = nullptr; + + using GraphT = ImmutableGraph<MachineInstr *, int>; + using Node = typename GraphT::Node; + using Edge = typename GraphT::Edge; + using size_type = typename GraphT::size_type; + MachineGadgetGraph(std::unique_ptr<Node[]> Nodes, + std::unique_ptr<Edge[]> Edges, size_type NodesSize, + size_type EdgesSize, int NumFences = 0, int NumGadgets = 0) + : GraphT(std::move(Nodes), std::move(Edges), NodesSize, EdgesSize), + NumFences(NumFences), NumGadgets(NumGadgets) {} + static inline bool isCFGEdge(const Edge &E) { + return E.getValue() != GadgetEdgeSentinel; + } + static inline bool isGadgetEdge(const Edge &E) { + return E.getValue() == GadgetEdgeSentinel; + } + int NumFences; + int NumGadgets; +}; + +class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass { +public: + X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Load Hardening"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + +private: + using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>; + using EdgeSet = MachineGadgetGraph::EdgeSet; + using NodeSet = MachineGadgetGraph::NodeSet; + using Gadget = std::pair<MachineInstr *, MachineInstr *>; + + const X86Subtarget *STI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + + std::unique_ptr<MachineGadgetGraph> + getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, + const MachineDominanceFrontier &MDF) const; + int hardenLoadsWithPlugin(MachineFunction &MF, + std::unique_ptr<MachineGadgetGraph> Graph) const; + int hardenLoadsWithGreedyHeuristic( + MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const; + int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G, + EdgeSet &ElimEdges /* in, out */, + NodeSet &ElimNodes /* in, out */) const; + std::unique_ptr<MachineGadgetGraph> + trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const; + void findAndCutEdges(MachineGadgetGraph &G, + EdgeSet &CutEdges /* out */) const; + int insertFences(MachineFunction &MF, MachineGadgetGraph &G, + EdgeSet &CutEdges /* in, out */) const; + bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const; + bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const; + inline bool isFence(const MachineInstr *MI) const { + return MI && (MI->getOpcode() == X86::LFENCE || + (STI->useLVIControlFlowIntegrity() && MI->isCall())); + } +}; + +} // end anonymous namespace + +namespace llvm { + +template <> +struct GraphTraits<MachineGadgetGraph *> + : GraphTraits<ImmutableGraph<MachineInstr *, int> *> {}; + +template <> +struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits { + using GraphType = MachineGadgetGraph; + using Traits = llvm::GraphTraits<GraphType *>; + using NodeRef = typename Traits::NodeRef; + using EdgeRef = typename Traits::EdgeRef; + using ChildIteratorType = typename Traits::ChildIteratorType; + using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType; + + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(NodeRef Node, GraphType *) { + if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel) + return "ARGS"; + + std::string Str; + raw_string_ostream OS(Str); + OS << *Node->getValue(); + return OS.str(); + } + + static std::string getNodeAttributes(NodeRef Node, GraphType *) { + MachineInstr *MI = Node->getValue(); + if (MI == MachineGadgetGraph::ArgNodeSentinel) + return "color = blue"; + if (MI->getOpcode() == X86::LFENCE) + return "color = green"; + return ""; + } + + static std::string getEdgeAttributes(NodeRef, ChildIteratorType E, + GraphType *) { + int EdgeVal = (*E.getCurrent()).getValue(); + return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal) + : "color = red, style = \"dashed\""; + } +}; + +} // end namespace llvm + +constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel; +constexpr int MachineGadgetGraph::GadgetEdgeSentinel; + +char X86LoadValueInjectionLoadHardeningPass::ID = 0; + +void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage( + AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineDominanceFrontier>(); + AU.setPreservesCFG(); +} + +static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF, + MachineGadgetGraph *G) { + WriteGraph(OS, G, /*ShortNames*/ false, + "Speculative gadgets for \"" + MF.getName() + "\" function"); +} + +bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + STI = &MF.getSubtarget<X86Subtarget>(); + if (!STI->useLVILoadHardening()) + return false; + + // FIXME: support 32-bit + if (!STI->is64Bit()) + report_fatal_error("LVI load hardening is only supported on 64-bit", false); + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + ++NumFunctionsConsidered; + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); + LLVM_DEBUG(dbgs() << "Building gadget graph...\n"); + const auto &MLI = getAnalysis<MachineLoopInfo>(); + const auto &MDT = getAnalysis<MachineDominatorTree>(); + const auto &MDF = getAnalysis<MachineDominanceFrontier>(); + std::unique_ptr<MachineGadgetGraph> Graph = getGadgetGraph(MF, MLI, MDT, MDF); + LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n"); + if (Graph == nullptr) + return false; // didn't find any gadgets + + if (EmitDotVerify) { + WriteGadgetGraph(outs(), MF, Graph.get()); + return false; + } + + if (EmitDot || EmitDotOnly) { + LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n"); + std::error_code FileError; + std::string FileName = "lvi."; + FileName += MF.getName(); + FileName += ".dot"; + raw_fd_ostream FileOut(FileName, FileError); + if (FileError) + errs() << FileError.message(); + WriteGadgetGraph(FileOut, MF, Graph.get()); + FileOut.close(); + LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n"); + if (EmitDotOnly) + return false; + } + + int FencesInserted; + if (!OptimizePluginPath.empty()) { + if (!OptimizeDL.isValid()) { + std::string ErrorMsg; + OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary( + OptimizePluginPath.c_str(), &ErrorMsg); + if (!ErrorMsg.empty()) + report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"'); + OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut"); + if (!OptimizeCut) + report_fatal_error("Invalid optimization plugin"); + } + FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph)); + } else { // Use the default greedy heuristic + FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph)); + } + + if (FencesInserted > 0) + ++NumFunctionsMitigated; + NumFences += FencesInserted; + return (FencesInserted > 0); +} + +std::unique_ptr<MachineGadgetGraph> +X86LoadValueInjectionLoadHardeningPass::getGadgetGraph( + MachineFunction &MF, const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, + const MachineDominanceFrontier &MDF) const { + using namespace rdf; + + // Build the Register Dataflow Graph using the RDF framework + TargetOperandInfo TOI{*TII}; + DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI}; + DFG.build(); + Liveness L{MF.getRegInfo(), DFG}; + L.computePhiInfo(); + + GraphBuilder Builder; + using GraphIter = typename GraphBuilder::BuilderNodeRef; + DenseMap<MachineInstr *, GraphIter> NodeMap; + int FenceCount = 0, GadgetCount = 0; + auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) { + auto Ref = NodeMap.find(MI); + if (Ref == NodeMap.end()) { + auto I = Builder.addVertex(MI); + NodeMap[MI] = I; + return std::pair<GraphIter, bool>{I, true}; + } + return std::pair<GraphIter, bool>{Ref->getSecond(), false}; + }; + + // The `Transmitters` map memoizes transmitters found for each def. If a def + // has not yet been analyzed, then it will not appear in the map. If a def + // has been analyzed and was determined not to have any transmitters, then + // its list of transmitters will be empty. + DenseMap<NodeId, std::vector<NodeId>> Transmitters; + + // Analyze all machine instructions to find gadgets and LFENCEs, adding + // each interesting value to `Nodes` + auto AnalyzeDef = [&](NodeAddr<DefNode *> SourceDef) { + SmallSet<NodeId, 8> UsesVisited, DefsVisited; + std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain = + [&](NodeAddr<DefNode *> Def) { + if (Transmitters.find(Def.Id) != Transmitters.end()) + return; // Already analyzed `Def` + + // Use RDF to find all the uses of `Def` + rdf::NodeSet Uses; + RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG)); + for (auto UseID : L.getAllReachedUses(DefReg, Def)) { + auto Use = DFG.addr<UseNode *>(UseID); + if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node + NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG); + for (auto I : L.getRealUses(Phi.Id)) { + if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) { + for (auto UA : I.second) + Uses.emplace(UA.first); + } + } + } else { // not a phi node + Uses.emplace(UseID); + } + } + + // For each use of `Def`, we want to know whether: + // (1) The use can leak the Def'ed value, + // (2) The use can further propagate the Def'ed value to more defs + for (auto UseID : Uses) { + if (!UsesVisited.insert(UseID).second) + continue; // Already visited this use of `Def` + + auto Use = DFG.addr<UseNode *>(UseID); + assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef)); + MachineOperand &UseMO = Use.Addr->getOp(); + MachineInstr &UseMI = *UseMO.getParent(); + assert(UseMO.isReg()); + + // We naively assume that an instruction propagates any loaded + // uses to all defs unless the instruction is a call, in which + // case all arguments will be treated as gadget sources during + // analysis of the callee function. + if (UseMI.isCall()) + continue; + + // Check whether this use can transmit (leak) its value. + if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) || + (!NoConditionalBranches && + instrUsesRegToBranch(UseMI, UseMO.getReg()))) { + Transmitters[Def.Id].push_back(Use.Addr->getOwner(DFG).Id); + if (UseMI.mayLoad()) + continue; // Found a transmitting load -- no need to continue + // traversing its defs (i.e., this load will become + // a new gadget source anyways). + } + + // Check whether the use propagates to more defs. + NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)}; + rdf::NodeList AnalyzedChildDefs; + for (auto &ChildDef : + Owner.Addr->members_if(DataFlowGraph::IsDef, DFG)) { + if (!DefsVisited.insert(ChildDef.Id).second) + continue; // Already visited this def + if (Def.Addr->getAttrs() & NodeAttrs::Dead) + continue; + if (Def.Id == ChildDef.Id) + continue; // `Def` uses itself (e.g., increment loop counter) + + AnalyzeDefUseChain(ChildDef); + + // `Def` inherits all of its child defs' transmitters. + for (auto TransmitterId : Transmitters[ChildDef.Id]) + Transmitters[Def.Id].push_back(TransmitterId); + } + } + + // Note that this statement adds `Def.Id` to the map if no + // transmitters were found for `Def`. + auto &DefTransmitters = Transmitters[Def.Id]; + + // Remove duplicate transmitters + llvm::sort(DefTransmitters); + DefTransmitters.erase( + std::unique(DefTransmitters.begin(), DefTransmitters.end()), + DefTransmitters.end()); + }; + + // Find all of the transmitters + AnalyzeDefUseChain(SourceDef); + auto &SourceDefTransmitters = Transmitters[SourceDef.Id]; + if (SourceDefTransmitters.empty()) + return; // No transmitters for `SourceDef` + + MachineInstr *Source = SourceDef.Addr->getFlags() & NodeAttrs::PhiRef + ? MachineGadgetGraph::ArgNodeSentinel + : SourceDef.Addr->getOp().getParent(); + auto GadgetSource = MaybeAddNode(Source); + // Each transmitter is a sink for `SourceDef`. + for (auto TransmitterId : SourceDefTransmitters) { + MachineInstr *Sink = DFG.addr<StmtNode *>(TransmitterId).Addr->getCode(); + auto GadgetSink = MaybeAddNode(Sink); + // Add the gadget edge to the graph. + Builder.addEdge(MachineGadgetGraph::GadgetEdgeSentinel, + GadgetSource.first, GadgetSink.first); + ++GadgetCount; + } + }; + + LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n"); + // Analyze function arguments + NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG); + for (NodeAddr<PhiNode *> ArgPhi : + EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) { + NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG); + llvm::for_each(Defs, AnalyzeDef); + } + // Analyze every instruction in MF + for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) { + for (NodeAddr<StmtNode *> SA : + BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) { + MachineInstr *MI = SA.Addr->getCode(); + if (isFence(MI)) { + MaybeAddNode(MI); + ++FenceCount; + } else if (MI->mayLoad()) { + NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG); + llvm::for_each(Defs, AnalyzeDef); + } + } + } + LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n"); + LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n"); + if (GadgetCount == 0) + return nullptr; + NumGadgets += GadgetCount; + + // Traverse CFG to build the rest of the graph + SmallSet<MachineBasicBlock *, 8> BlocksVisited; + std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG = + [&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) { + unsigned LoopDepth = MLI.getLoopDepth(MBB); + if (!MBB->empty()) { + // Always add the first instruction in each block + auto NI = MBB->begin(); + auto BeginBB = MaybeAddNode(&*NI); + Builder.addEdge(ParentDepth, GI, BeginBB.first); + if (!BlocksVisited.insert(MBB).second) + return; + + // Add any instructions within the block that are gadget components + GI = BeginBB.first; + while (++NI != MBB->end()) { + auto Ref = NodeMap.find(&*NI); + if (Ref != NodeMap.end()) { + Builder.addEdge(LoopDepth, GI, Ref->getSecond()); + GI = Ref->getSecond(); + } + } + + // Always add the terminator instruction, if one exists + auto T = MBB->getFirstTerminator(); + if (T != MBB->end()) { + auto EndBB = MaybeAddNode(&*T); + if (EndBB.second) + Builder.addEdge(LoopDepth, GI, EndBB.first); + GI = EndBB.first; + } + } + for (MachineBasicBlock *Succ : MBB->successors()) + TraverseCFG(Succ, GI, LoopDepth); + }; + // ArgNodeSentinel is a pseudo-instruction that represents MF args in the + // GadgetGraph + GraphIter ArgNode = MaybeAddNode(MachineGadgetGraph::ArgNodeSentinel).first; + TraverseCFG(&MF.front(), ArgNode, 0); + std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)}; + LLVM_DEBUG(dbgs() << "Found " << G->nodes_size() << " nodes\n"); + return G; +} + +// Returns the number of remaining gadget edges that could not be eliminated +int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes( + MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */, + MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const { + if (G.NumFences > 0) { + // Eliminate fences and CFG edges that ingress and egress the fence, as + // they are trivially mitigated. + for (const auto &E : G.edges()) { + const MachineGadgetGraph::Node *Dest = E.getDest(); + if (isFence(Dest->getValue())) { + ElimNodes.insert(*Dest); + ElimEdges.insert(E); + for (const auto &DE : Dest->edges()) + ElimEdges.insert(DE); + } + } + } + + // Find and eliminate gadget edges that have been mitigated. + int MitigatedGadgets = 0, RemainingGadgets = 0; + MachineGadgetGraph::NodeSet ReachableNodes{G}; + for (const auto &RootN : G.nodes()) { + if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge)) + continue; // skip this node if it isn't a gadget source + + // Find all of the nodes that are CFG-reachable from RootN using DFS + ReachableNodes.clear(); + std::function<void(const MachineGadgetGraph::Node *, bool)> + FindReachableNodes = + [&](const MachineGadgetGraph::Node *N, bool FirstNode) { + if (!FirstNode) + ReachableNodes.insert(*N); + for (const auto &E : N->edges()) { + const MachineGadgetGraph::Node *Dest = E.getDest(); + if (MachineGadgetGraph::isCFGEdge(E) && + !ElimEdges.contains(E) && !ReachableNodes.contains(*Dest)) + FindReachableNodes(Dest, false); + } + }; + FindReachableNodes(&RootN, true); + + // Any gadget whose sink is unreachable has been mitigated + for (const auto &E : RootN.edges()) { + if (MachineGadgetGraph::isGadgetEdge(E)) { + if (ReachableNodes.contains(*E.getDest())) { + // This gadget's sink is reachable + ++RemainingGadgets; + } else { // This gadget's sink is unreachable, and therefore mitigated + ++MitigatedGadgets; + ElimEdges.insert(E); + } + } + } + } + return RemainingGadgets; +} + +std::unique_ptr<MachineGadgetGraph> +X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges( + std::unique_ptr<MachineGadgetGraph> Graph) const { + MachineGadgetGraph::NodeSet ElimNodes{*Graph}; + MachineGadgetGraph::EdgeSet ElimEdges{*Graph}; + int RemainingGadgets = + elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes); + if (ElimEdges.empty() && ElimNodes.empty()) { + Graph->NumFences = 0; + Graph->NumGadgets = RemainingGadgets; + } else { + Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */, + RemainingGadgets); + } + return Graph; +} + +int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin( + MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const { + int FencesInserted = 0; + + do { + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); + Graph = trimMitigatedEdges(std::move(Graph)); + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); + if (Graph->NumGadgets == 0) + break; + + LLVM_DEBUG(dbgs() << "Cutting edges...\n"); + EdgeSet CutEdges{*Graph}; + auto Nodes = std::make_unique<unsigned int[]>(Graph->nodes_size() + + 1 /* terminator node */); + auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size()); + auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size()); + auto EdgeValues = std::make_unique<int[]>(Graph->edges_size()); + for (const auto &N : Graph->nodes()) { + Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin()); + } + Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node + for (const auto &E : Graph->edges()) { + Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest()); + EdgeValues[Graph->getEdgeIndex(E)] = E.getValue(); + } + OptimizeCut(Nodes.get(), Graph->nodes_size(), Edges.get(), EdgeValues.get(), + EdgeCuts.get(), Graph->edges_size()); + for (int I = 0; I < Graph->edges_size(); ++I) + if (EdgeCuts[I]) + CutEdges.set(I); + LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); + LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); + + LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); + FencesInserted += insertFences(MF, *Graph, CutEdges); + LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); + LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + + Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph}, + CutEdges); + } while (true); + + return FencesInserted; +} + +int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic( + MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const { + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); + Graph = trimMitigatedEdges(std::move(Graph)); + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); + if (Graph->NumGadgets == 0) + return 0; + + LLVM_DEBUG(dbgs() << "Cutting edges...\n"); + MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph}; + MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph}; + auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) { + return !ElimEdges.contains(E) && !CutEdges.contains(E) && + MachineGadgetGraph::isCFGEdge(E); + }; + auto IsGadgetEdge = [&ElimEdges, + &CutEdges](const MachineGadgetGraph::Edge &E) { + return !ElimEdges.contains(E) && !CutEdges.contains(E) && + MachineGadgetGraph::isGadgetEdge(E); + }; + + // FIXME: this is O(E^2), we could probably do better. + do { + // Find the cheapest CFG edge that will eliminate a gadget (by being + // egress from a SOURCE node or ingress to a SINK node), and cut it. + const MachineGadgetGraph::Edge *CheapestSoFar = nullptr; + + // First, collect all gadget source and sink nodes. + MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph}; + for (const auto &N : Graph->nodes()) { + if (ElimNodes.contains(N)) + continue; + for (const auto &E : N.edges()) { + if (IsGadgetEdge(E)) { + GadgetSources.insert(N); + GadgetSinks.insert(*E.getDest()); + } + } + } + + // Next, look for the cheapest CFG edge which, when cut, is guaranteed to + // mitigate at least one gadget by either: + // (a) being egress from a gadget source, or + // (b) being ingress to a gadget sink. + for (const auto &N : Graph->nodes()) { + if (ElimNodes.contains(N)) + continue; + for (const auto &E : N.edges()) { + if (IsCFGEdge(E)) { + if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) { + if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue()) + CheapestSoFar = &E; + } + } + } + } + + assert(CheapestSoFar && "Failed to cut an edge"); + CutEdges.insert(*CheapestSoFar); + ElimEdges.insert(*CheapestSoFar); + } while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes)); + LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); + LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); + + LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); + int FencesInserted = insertFences(MF, *Graph, CutEdges); + LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); + LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + + return FencesInserted; +} + +int X86LoadValueInjectionLoadHardeningPass::insertFences( + MachineFunction &MF, MachineGadgetGraph &G, + EdgeSet &CutEdges /* in, out */) const { + int FencesInserted = 0; + for (const auto &N : G.nodes()) { + for (const auto &E : N.edges()) { + if (CutEdges.contains(E)) { + MachineInstr *MI = N.getValue(), *Prev; + MachineBasicBlock *MBB; // Insert an LFENCE in this MBB + MachineBasicBlock::iterator InsertionPt; // ...at this point + if (MI == MachineGadgetGraph::ArgNodeSentinel) { + // insert LFENCE at beginning of entry block + MBB = &MF.front(); + InsertionPt = MBB->begin(); + Prev = nullptr; + } else if (MI->isBranch()) { // insert the LFENCE before the branch + MBB = MI->getParent(); + InsertionPt = MI; + Prev = MI->getPrevNode(); + // Remove all egress CFG edges from this branch because the inserted + // LFENCE prevents gadgets from crossing the branch. + for (const auto &E : N.edges()) { + if (MachineGadgetGraph::isCFGEdge(E)) + CutEdges.insert(E); + } + } else { // insert the LFENCE after the instruction + MBB = MI->getParent(); + InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end(); + Prev = InsertionPt == MBB->end() + ? (MBB->empty() ? nullptr : &MBB->back()) + : InsertionPt->getPrevNode(); + } + // Ensure this insertion is not redundant (two LFENCEs in sequence). + if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) && + (!Prev || !isFence(Prev))) { + BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); + ++FencesInserted; + } + } + } + } + return FencesInserted; +} + +bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory( + const MachineInstr &MI, unsigned Reg) const { + if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE || + MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE) + return false; + + // FIXME: This does not handle pseudo loading instruction like TCRETURN* + const MCInstrDesc &Desc = MI.getDesc(); + int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemRefBeginIdx < 0) { + LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading " + "instruction:\n"; + MI.print(dbgs()); dbgs() << '\n';); + return false; + } + MemRefBeginIdx += X86II::getOperandBias(Desc); + + const MachineOperand &BaseMO = + MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); + const MachineOperand &IndexMO = + MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); + return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister && + TRI->regsOverlap(BaseMO.getReg(), Reg)) || + (IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister && + TRI->regsOverlap(IndexMO.getReg(), Reg)); +} + +bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch( + const MachineInstr &MI, unsigned Reg) const { + if (!MI.isConditionalBranch()) + return false; + for (const MachineOperand &Use : MI.uses()) + if (Use.isReg() && Use.getReg() == Reg) + return true; + return false; +} + +INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY, + "X86 LVI load hardening", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY, + "X86 LVI load hardening", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() { + return new X86LoadValueInjectionLoadHardeningPass(); +} + +namespace { + +/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive +/// analysis passes that add complexity to the pipeline. This complexity +/// can cause noticable overhead when no optimizations are enabled, i.e., -O0. +/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to +/// provide the same security as the optimized pass, but without adding +/// unnecessary complexity to the LLVM pipeline. +/// +/// The behavior of this pass is simply to insert an LFENCE after every load +/// instruction. +class X86LoadValueInjectionLoadHardeningUnoptimizedPass + : public MachineFunctionPass { +public: + X86LoadValueInjectionLoadHardeningUnoptimizedPass() + : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + static char ID; +}; + +} // end anonymous namespace + +char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0; + +bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>(); + if (!STI->useLVILoadHardening()) + return false; + + // FIXME: support 32-bit + if (!STI->is64Bit()) + report_fatal_error("LVI load hardening is only supported on 64-bit", false); + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + bool Modified = false; + ++NumFunctionsConsidered; + + const TargetInstrInfo *TII = STI->getInstrInfo(); + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE || + MI.getOpcode() == X86::MFENCE) + continue; + + MachineBasicBlock::iterator InsertionPt = + MI.getNextNode() ? MI.getNextNode() : MBB.end(); + BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); + ++NumFences; + Modified = true; + } + } + + if (Modified) + ++NumFunctionsMitigated; + + return Modified; +} + +INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY, + "X86 LVI load hardening", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() { + return new X86LoadValueInjectionLoadHardeningUnoptimizedPass(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp new file mode 100644 index 000000000000..6e1134a25950 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp @@ -0,0 +1,143 @@ +//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: Replaces every `ret` instruction with the sequence: +/// ``` +/// pop <scratch-reg> +/// lfence +/// jmp *<scratch-reg> +/// ``` +/// where `<scratch-reg>` is some available scratch register, according to the +/// calling convention of the function being mitigated. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include <bitset> + +using namespace llvm; + +#define PASS_KEY "x86-lvi-ret" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); +STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); +STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " + "were deployed"); + +namespace { + +class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass { +public: + X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Ret-Hardening"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; +}; + +} // end anonymous namespace + +char X86LoadValueInjectionRetHardeningPass::ID = 0; + +bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>(); + if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit()) + return false; // FIXME: support 32-bit + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + ++NumFunctionsConsidered; + const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const X86InstrInfo *TII = Subtarget->getInstrInfo(); + unsigned ClobberReg = X86::NoRegister; + std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s; + UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer + UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer + UnclobberableGR64s.set(X86::RAX); // used for function return + UnclobberableGR64s.set(X86::RDX); // used for function return + + // We can clobber any register allowed by the function's calling convention. + for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR) + UnclobberableGR64s.set(Reg); + for (auto &Reg : X86::GR64RegClass) { + if (!UnclobberableGR64s.test(Reg)) { + ClobberReg = Reg; + break; + } + } + + if (ClobberReg != X86::NoRegister) { + LLVM_DEBUG(dbgs() << "Selected register " + << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg) + << " to clobber\n"); + } else { + LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n"); + } + + bool Modified = false; + for (auto &MBB : MF) { + if (MBB.empty()) + continue; + + MachineInstr &MI = MBB.back(); + if (MI.getOpcode() != X86::RETQ) + continue; + + if (ClobberReg != X86::NoRegister) { + MBB.erase_instr(&MI); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r)) + .addReg(ClobberReg, RegState::Define) + .setMIFlag(MachineInstr::FrameDestroy); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r)) + .addReg(ClobberReg); + } else { + // In case there is no available scratch register, we can still read from + // RSP to assert that RSP points to a valid page. The write to RSP is + // also helpful because it verifies that the stack's write permissions + // are intact. + MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE)); + addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)), + X86::RSP, false, 0) + .addImm(0) + ->addRegisterDead(X86::EFLAGS, TRI); + } + + ++NumFences; + Modified = true; + } + + if (Modified) + ++NumFunctionsMitigated; + return Modified; +} + +INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY, + "X86 LVI ret hardener", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() { + return new X86LoadValueInjectionRetHardeningPass(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp index 7f49c6e861d4..f5caaaae4d84 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1220,8 +1220,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, break; case MachineOperand::MO_Register: // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpolineIndirectCalls()) - report_fatal_error("Lowering register statepoints with retpoline not " + if (Subtarget->useIndirectThunkCalls()) + report_fatal_error("Lowering register statepoints with thunks not " "yet implemented."); CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); CallOpcode = X86::CALL64r; @@ -1399,9 +1399,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, EmitAndCountInstruction( MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpolineIndirectCalls()) + if (Subtarget->useIndirectThunkCalls()) report_fatal_error( - "Lowering patchpoint with retpoline not yet implemented."); + "Lowering patchpoint with thunks not yet implemented."); EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86RetpolineThunks.cpp deleted file mode 100644 index 9085d7f068ac..000000000000 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86RetpolineThunks.cpp +++ /dev/null @@ -1,286 +0,0 @@ -//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// Pass that injects an MI thunk implementing a "retpoline". This is -/// a RET-implemented trampoline that is used to lower indirect calls in a way -/// that prevents speculation on some x86 processors and can be used to mitigate -/// security vulnerabilities due to targeted speculative execution and side -/// channels such as CVE-2017-5715. -/// -/// TODO(chandlerc): All of this code could use better comments and -/// documentation. -/// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86InstrBuilder.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "x86-retpoline-thunks" - -static const char ThunkNamePrefix[] = "__llvm_retpoline_"; -static const char R11ThunkName[] = "__llvm_retpoline_r11"; -static const char EAXThunkName[] = "__llvm_retpoline_eax"; -static const char ECXThunkName[] = "__llvm_retpoline_ecx"; -static const char EDXThunkName[] = "__llvm_retpoline_edx"; -static const char EDIThunkName[] = "__llvm_retpoline_edi"; - -namespace { -class X86RetpolineThunks : public MachineFunctionPass { -public: - static char ID; - - X86RetpolineThunks() : MachineFunctionPass(ID) {} - - StringRef getPassName() const override { return "X86 Retpoline Thunks"; } - - bool doInitialization(Module &M) override; - bool runOnMachineFunction(MachineFunction &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<MachineModuleInfoWrapperPass>(); - AU.addPreserved<MachineModuleInfoWrapperPass>(); - } - -private: - MachineModuleInfo *MMI = nullptr; - const TargetMachine *TM = nullptr; - bool Is64Bit = false; - const X86Subtarget *STI = nullptr; - const X86InstrInfo *TII = nullptr; - - bool InsertedThunks = false; - - void createThunkFunction(Module &M, StringRef Name); - void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg); - void populateThunk(MachineFunction &MF, unsigned Reg); -}; - -} // end anonymous namespace - -FunctionPass *llvm::createX86RetpolineThunksPass() { - return new X86RetpolineThunks(); -} - -char X86RetpolineThunks::ID = 0; - -bool X86RetpolineThunks::doInitialization(Module &M) { - InsertedThunks = false; - return false; -} - -bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { - LLVM_DEBUG(dbgs() << getPassName() << '\n'); - - TM = &MF.getTarget();; - STI = &MF.getSubtarget<X86Subtarget>(); - TII = STI->getInstrInfo(); - Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64; - - MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); - Module &M = const_cast<Module &>(*MMI->getModule()); - - // If this function is not a thunk, check to see if we need to insert - // a thunk. - if (!MF.getName().startswith(ThunkNamePrefix)) { - // If we've already inserted a thunk, nothing else to do. - if (InsertedThunks) - return false; - - // Only add a thunk if one of the functions has the retpoline feature - // enabled in its subtarget, and doesn't enable external thunks. - // FIXME: Conditionalize on indirect calls so we don't emit a thunk when - // nothing will end up calling it. - // FIXME: It's a little silly to look at every function just to enumerate - // the subtargets, but eventually we'll want to look at them for indirect - // calls, so maybe this is OK. - if ((!STI->useRetpolineIndirectCalls() && - !STI->useRetpolineIndirectBranches()) || - STI->useRetpolineExternalThunk()) - return false; - - // Otherwise, we need to insert the thunk. - // WARNING: This is not really a well behaving thing to do in a function - // pass. We extract the module and insert a new function (and machine - // function) directly into the module. - if (Is64Bit) - createThunkFunction(M, R11ThunkName); - else - for (StringRef Name : - {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName}) - createThunkFunction(M, Name); - InsertedThunks = true; - return true; - } - - // If this *is* a thunk function, we need to populate it with the correct MI. - if (Is64Bit) { - assert(MF.getName() == "__llvm_retpoline_r11" && - "Should only have an r11 thunk on 64-bit targets"); - - // __llvm_retpoline_r11: - // callq .Lr11_call_target - // .Lr11_capture_spec: - // pause - // lfence - // jmp .Lr11_capture_spec - // .align 16 - // .Lr11_call_target: - // movq %r11, (%rsp) - // retq - populateThunk(MF, X86::R11); - } else { - // For 32-bit targets we need to emit a collection of thunks for various - // possible scratch registers as well as a fallback that uses EDI, which is - // normally callee saved. - // __llvm_retpoline_eax: - // calll .Leax_call_target - // .Leax_capture_spec: - // pause - // jmp .Leax_capture_spec - // .align 16 - // .Leax_call_target: - // movl %eax, (%esp) # Clobber return addr - // retl - // - // __llvm_retpoline_ecx: - // ... # Same setup - // movl %ecx, (%esp) - // retl - // - // __llvm_retpoline_edx: - // ... # Same setup - // movl %edx, (%esp) - // retl - // - // __llvm_retpoline_edi: - // ... # Same setup - // movl %edi, (%esp) - // retl - if (MF.getName() == EAXThunkName) - populateThunk(MF, X86::EAX); - else if (MF.getName() == ECXThunkName) - populateThunk(MF, X86::ECX); - else if (MF.getName() == EDXThunkName) - populateThunk(MF, X86::EDX); - else if (MF.getName() == EDIThunkName) - populateThunk(MF, X86::EDI); - else - llvm_unreachable("Invalid thunk name on x86-32!"); - } - - return true; -} - -void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) { - assert(Name.startswith(ThunkNamePrefix) && - "Created a thunk with an unexpected prefix!"); - - LLVMContext &Ctx = M.getContext(); - auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); - Function *F = - Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); - F->setVisibility(GlobalValue::HiddenVisibility); - F->setComdat(M.getOrInsertComdat(Name)); - - // Add Attributes so that we don't create a frame, unwind information, or - // inline. - AttrBuilder B; - B.addAttribute(llvm::Attribute::NoUnwind); - B.addAttribute(llvm::Attribute::Naked); - F->addAttributes(llvm::AttributeList::FunctionIndex, B); - - // Populate our function a bit so that we can verify. - BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); - IRBuilder<> Builder(Entry); - - Builder.CreateRetVoid(); - - // MachineFunctions/MachineBasicBlocks aren't created automatically for the - // IR-level constructs we already made. Create them and insert them into the - // module. - MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); - MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); - - // Insert EntryMBB into MF. It's not in the module until we do this. - MF.insert(MF.end(), EntryMBB); -} - -void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB, - unsigned Reg) { - const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; - const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP; - addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0) - .addReg(Reg); -} - -void X86RetpolineThunks::populateThunk(MachineFunction &MF, - unsigned Reg) { - // Set MF properties. We never use vregs... - MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); - - // Grab the entry MBB and erase any other blocks. O0 codegen appears to - // generate two bbs for the entry block. - MachineBasicBlock *Entry = &MF.front(); - Entry->clear(); - while (MF.size() > 1) - MF.erase(std::next(MF.begin())); - - MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); - MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); - MCSymbol *TargetSym = MF.getContext().createTempSymbol(); - MF.push_back(CaptureSpec); - MF.push_back(CallTarget); - - const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; - const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; - - Entry->addLiveIn(Reg); - BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); - - // The MIR verifier thinks that the CALL in the entry block will fall through - // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is - // the successor, but the MIR verifier doesn't know how to cope with that. - Entry->addSuccessor(CaptureSpec); - - // In the capture loop for speculation, we want to stop the processor from - // speculating as fast as possible. On Intel processors, the PAUSE instruction - // will block speculation without consuming any execution resources. On AMD - // processors, the PAUSE instruction is (essentially) a nop, so we also use an - // LFENCE instruction which they have advised will stop speculation as well - // with minimal resource utilization. We still end the capture with a jump to - // form an infinite loop to fully guarantee that no matter what implementation - // of the x86 ISA, speculating this code path never escapes. - BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); - BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); - BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); - CaptureSpec->setHasAddressTaken(); - CaptureSpec->addSuccessor(CaptureSpec); - - CallTarget->addLiveIn(Reg); - CallTarget->setHasAddressTaken(); - CallTarget->setAlignment(Align(16)); - insertRegReturnAddrClobber(*CallTarget, Reg); - CallTarget->back().setPreInstrSymbol(MF, TargetSym); - BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); -} diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h index f4e8d30328ca..af5153243c8b 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h @@ -421,6 +421,16 @@ protected: /// than emitting one inside the compiler. bool UseRetpolineExternalThunk = false; + /// Prevent generation of indirect call/branch instructions from memory, + /// and force all indirect call/branch instructions from a register to be + /// preceded by an LFENCE. Also decompose RET instructions into a + /// POP+LFENCE+JMP sequence. + bool UseLVIControlFlowIntegrity = false; + + /// Insert LFENCE instructions to prevent data speculatively injected into + /// loads from being used maliciously. + bool UseLVILoadHardening = false; + /// Use software floating point for code generation. bool UseSoftFloat = false; @@ -707,8 +717,21 @@ public: return UseRetpolineIndirectBranches; } bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } + + // These are generic getters that OR together all of the thunk types + // supported by the subtarget. Therefore useIndirectThunk*() will return true + // if any respective thunk feature is enabled. + bool useIndirectThunkCalls() const { + return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity(); + } + bool useIndirectThunkBranches() const { + return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); + } + bool preferMaskRegisters() const { return PreferMaskRegisters; } bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; } + bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; } + bool useLVILoadHardening() const { return UseLVILoadHardening; } unsigned getPreferVectorWidth() const { return PreferVectorWidth; } unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } @@ -853,10 +876,10 @@ public: /// Return true if the subtarget allows calls to immediate address. bool isLegalToCallImmediateAddr() const; - /// If we are using retpolines, we need to expand indirectbr to avoid it + /// If we are using indirect thunks, we need to expand indirectbr to avoid it /// lowering to an actual indirect jump. bool enableIndirectBrExpand() const override { - return useRetpolineIndirectBranches(); + return useIndirectThunkBranches(); } /// Enable the MachineScheduler pass for all X86 subtargets. diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp index 8c696e9adbed..9f639ffa22ec 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -82,6 +82,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeX86SpeculativeLoadHardeningPassPass(PR); initializeX86FlagsCopyLoweringPassPass(PR); initializeX86CondBrFoldingPassPass(PR); + initializeX86LoadValueInjectionLoadHardeningPassPass(PR); + initializeX86LoadValueInjectionRetHardeningPassPass(PR); initializeX86OptimizeLEAPassPass(PR); } @@ -222,7 +224,7 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, getEffectiveRelocModel(TT, JIT, RM), getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64), OL), - TLOF(createTLOF(getTargetTriple())) { + TLOF(createTLOF(getTargetTriple())), IsJIT(JIT) { // On PS4, the "return address" of a 'noreturn' call must still be within // the calling function, and TrapUnreachable is an easy way to get that. if (TT.isPS4() || TT.isOSBinFormatMachO()) { @@ -496,6 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() { void X86PassConfig::addPostRegAlloc() { addPass(createX86FloatingPointStackifierPass()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createX86LoadValueInjectionLoadHardeningPass()); + else + addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass()); } void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } @@ -525,7 +531,7 @@ void X86PassConfig::addPreEmitPass2() { const Triple &TT = TM->getTargetTriple(); const MCAsmInfo *MAI = TM->getMCAsmInfo(); - addPass(createX86RetpolineThunksPass()); + addPass(createX86IndirectThunksPass()); // Insert extra int3 instructions after trailing call instructions to avoid // issues in the unwinder. @@ -542,6 +548,7 @@ void X86PassConfig::addPreEmitPass2() { // Identify valid longjmp targets for Windows Control Flow Guard. if (TT.isOSWindows()) addPass(createCFGuardLongjmpPass()); + addPass(createX86LoadValueInjectionRetHardeningPass()); } std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const { diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h index ec3db7b1e9e8..757ce8bc5c72 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h @@ -30,6 +30,8 @@ class TargetTransformInfo; class X86TargetMachine final : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; mutable StringMap<std::unique_ptr<X86Subtarget>> SubtargetMap; + // True if this is used in JIT. + bool IsJIT; public: X86TargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -52,6 +54,8 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + bool isJIT() const { return IsJIT; } }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 66cb3e74e53e..1e067a45d016 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1155,7 +1155,10 @@ static void simplifySuspendPoints(coro::Shape &Shape) { if (N == 0) return; while (true) { - if (simplifySuspendPoint(cast<CoroSuspendInst>(S[I]), Shape.CoroBegin)) { + auto SI = cast<CoroSuspendInst>(S[I]); + // Leave final.suspend to handleFinalSuspend since it is undefined behavior + // to resume a coroutine suspended at the final suspend point. + if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) { if (--N == I) break; std::swap(S[I], S[N]); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 0fd966457ece..b75e853553c5 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -450,14 +450,19 @@ static bool CanDoGlobalSRA(GlobalVariable *GV) { /// Copy over the debug info for a variable to its SRA replacements. static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, uint64_t FragmentOffsetInBits, - uint64_t FragmentSizeInBits, - unsigned NumElements) { + uint64_t FragmentSizeInBits) { SmallVector<DIGlobalVariableExpression *, 1> GVs; GV->getDebugInfo(GVs); for (auto *GVE : GVs) { DIVariable *Var = GVE->getVariable(); + Optional<uint64_t> VarSize = Var->getSizeInBits(); + DIExpression *Expr = GVE->getExpression(); - if (NumElements > 1) { + // If the FragmentSize is smaller than the variable, + // emit a fragment expression. + // If the variable size is unknown a fragment must be + // emitted to be safe. + if (!VarSize || FragmentSizeInBits < *VarSize) { if (auto E = DIExpression::createFragmentExpression( Expr, FragmentOffsetInBits, FragmentSizeInBits)) Expr = *E; @@ -539,8 +544,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Copy over the debug info for the variable. uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx); - transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, - STy->getNumElements()); + transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size); } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { uint64_t EltSize = DL.getTypeAllocSize(ElTy); Align EltAlign(DL.getABITypeAlignment(ElTy)); @@ -553,7 +557,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (NewAlign > EltAlign) NGV->setAlignment(NewAlign); transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx, - FragmentSizeInBits, STy->getNumElements()); + FragmentSizeInBits); } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index ec976a971e3c..23561c25c50a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1768,7 +1768,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Constant *C2; // C-(C2-X) --> X+(C-C2) - if (match(Op1, m_Sub(m_Constant(C2), m_Value(X)))) + if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))) && !isa<ConstantExpr>(C2)) return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2)); // C-(X+C2) --> (C-C2)-X diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index bf3e4ed3e31f..8fd9e7a2e610 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -115,7 +115,8 @@ private: // list. Function * insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>); - Function *insertFlush(ArrayRef<std::pair<GlobalVariable *, MDNode *>>); + Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>); + Function *insertFlush(Function *ResetF); void AddFlushBeforeForkAndExec(); @@ -630,35 +631,76 @@ static bool shouldKeepInEntry(BasicBlock::iterator It) { } void GCOVProfiler::AddFlushBeforeForkAndExec() { - SmallVector<Instruction *, 2> ForkAndExecs; + SmallVector<CallInst *, 2> Forks; + SmallVector<CallInst *, 2> Execs; for (auto &F : M->functions()) { auto *TLI = &GetTLI(F); for (auto &I : instructions(F)) { if (CallInst *CI = dyn_cast<CallInst>(&I)) { if (Function *Callee = CI->getCalledFunction()) { LibFunc LF; - if (TLI->getLibFunc(*Callee, LF) && - (LF == LibFunc_fork || LF == LibFunc_execl || - LF == LibFunc_execle || LF == LibFunc_execlp || - LF == LibFunc_execv || LF == LibFunc_execvp || - LF == LibFunc_execve || LF == LibFunc_execvpe || - LF == LibFunc_execvP)) { - ForkAndExecs.push_back(&I); + if (TLI->getLibFunc(*Callee, LF)) { + if (LF == LibFunc_fork) { +#if !defined(_WIN32) + Forks.push_back(CI); +#endif + } else if (LF == LibFunc_execl || LF == LibFunc_execle || + LF == LibFunc_execlp || LF == LibFunc_execv || + LF == LibFunc_execvp || LF == LibFunc_execve || + LF == LibFunc_execvpe || LF == LibFunc_execvP) { + Execs.push_back(CI); + } } } } } } - // We need to split the block after the fork/exec call - // because else the counters for the lines after will be - // the same as before the call. - for (auto I : ForkAndExecs) { - IRBuilder<> Builder(I); + for (auto F : Forks) { + IRBuilder<> Builder(F); + BasicBlock *Parent = F->getParent(); + auto NextInst = ++F->getIterator(); + + // We've a fork so just reset the counters in the child process + FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false); + FunctionCallee GCOVFork = M->getOrInsertFunction("__gcov_fork", FTy); + F->setCalledFunction(GCOVFork); + + // We split just after the fork to have a counter for the lines after + // Anyway there's a bug: + // void foo() { fork(); } + // void bar() { foo(); blah(); } + // then "blah();" will be called 2 times but showed as 1 + // because "blah()" belongs to the same block as "foo();" + Parent->splitBasicBlock(NextInst); + + // back() is a br instruction with a debug location + // equals to the one from NextAfterFork + // So to avoid to have two debug locs on two blocks just change it + DebugLoc Loc = F->getDebugLoc(); + Parent->back().setDebugLoc(Loc); + } + + for (auto E : Execs) { + IRBuilder<> Builder(E); + BasicBlock *Parent = E->getParent(); + auto NextInst = ++E->getIterator(); + + // Since the process is replaced by a new one we need to write out gcdas + // No need to reset the counters since they'll be lost after the exec** FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false); - FunctionCallee GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy); - Builder.CreateCall(GCOVFlush); - I->getParent()->splitBasicBlock(I); + FunctionCallee WriteoutF = + M->getOrInsertFunction("llvm_writeout_files", FTy); + Builder.CreateCall(WriteoutF); + + DebugLoc Loc = E->getDebugLoc(); + Builder.SetInsertPoint(&*NextInst); + // If the exec** fails we must reset the counters since they've been + // dumped + FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy); + Builder.CreateCall(ResetF)->setDebugLoc(Loc); + Parent->splitBasicBlock(NextInst); + Parent->back().setDebugLoc(Loc); } } @@ -850,7 +892,8 @@ bool GCOVProfiler::emitProfileArcs() { } Function *WriteoutF = insertCounterWriteout(CountersBySP); - Function *FlushF = insertFlush(CountersBySP); + Function *ResetF = insertReset(CountersBySP); + Function *FlushF = insertFlush(ResetF); // Create a small bit of code that registers the "__llvm_gcov_writeout" to // be executed at exit and the "__llvm_gcov_flush" function to be executed @@ -868,16 +911,14 @@ bool GCOVProfiler::emitProfileArcs() { IRBuilder<> Builder(BB); FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Type *Params[] = { - PointerType::get(FTy, 0), - PointerType::get(FTy, 0) - }; + Type *Params[] = {PointerType::get(FTy, 0), PointerType::get(FTy, 0), + PointerType::get(FTy, 0)}; FTy = FunctionType::get(Builder.getVoidTy(), Params, false); - // Initialize the environment and register the local writeout and flush - // functions. + // Initialize the environment and register the local writeout, flush and + // reset functions. FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy); - Builder.CreateCall(GCOVInit, {WriteoutF, FlushF}); + Builder.CreateCall(GCOVInit, {WriteoutF, FlushF, ResetF}); Builder.CreateRetVoid(); appendToGlobalCtors(*M, F, 0); @@ -1190,8 +1231,43 @@ Function *GCOVProfiler::insertCounterWriteout( return WriteoutF; } -Function *GCOVProfiler:: -insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { +Function *GCOVProfiler::insertReset( + ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *ResetF = M->getFunction("__llvm_gcov_reset"); + if (!ResetF) + ResetF = Function::Create(FTy, GlobalValue::InternalLinkage, + "__llvm_gcov_reset", M); + else + ResetF->setLinkage(GlobalValue::InternalLinkage); + ResetF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + ResetF->addFnAttr(Attribute::NoInline); + if (Options.NoRedZone) + ResetF->addFnAttr(Attribute::NoRedZone); + + BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF); + IRBuilder<> Builder(Entry); + + // Zero out the counters. + for (const auto &I : CountersBySP) { + GlobalVariable *GV = I.first; + Constant *Null = Constant::getNullValue(GV->getValueType()); + Builder.CreateStore(Null, GV); + } + + Type *RetTy = ResetF->getReturnType(); + if (RetTy->isVoidTy()) + Builder.CreateRetVoid(); + else if (RetTy->isIntegerTy()) + // Used if __llvm_gcov_reset was implicitly declared. + Builder.CreateRet(ConstantInt::get(RetTy, 0)); + else + report_fatal_error("invalid return type for __llvm_gcov_reset"); + + return ResetF; +} + +Function *GCOVProfiler::insertFlush(Function *ResetF) { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *FlushF = M->getFunction("__llvm_gcov_flush"); if (!FlushF) @@ -1212,16 +1288,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { IRBuilder<> Builder(Entry); Builder.CreateCall(WriteoutF, {}); - - // Zero out the counters. - for (const auto &I : CountersBySP) { - GlobalVariable *GV = I.first; - Constant *Null = Constant::getNullValue(GV->getValueType()); - Builder.CreateStore(Null, GV); - } + Builder.CreateCall(ResetF, {}); Type *RetTy = FlushF->getReturnType(); - if (RetTy == Type::getVoidTy(*Ctx)) + if (RetTy->isVoidTy()) Builder.CreateRetVoid(); else if (RetTy->isIntegerTy()) // Used if __llvm_gcov_flush was implicitly declared. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index d8d7acae5c9f..81163e9fcfab 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -527,19 +527,19 @@ namespace { // Collect information about PHI nodes which can be transformed in // rewriteLoopExitValues. struct RewritePhi { - PHINode *PN; - - // Ith incoming value. - unsigned Ith; - - // Exit value after expansion. - Value *Val; - - // High Cost when expansion. - bool HighCost; - - RewritePhi(PHINode *P, unsigned I, Value *V, bool H) - : PN(P), Ith(I), Val(V), HighCost(H) {} + PHINode *PN; // For which PHI node is this replacement? + unsigned Ith; // For which incoming value? + const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting. + Instruction *ExpansionPoint; // Where we'd like to expand that SCEV? + bool HighCost; // Is this expansion a high-cost? + + Value *Expansion = nullptr; + bool ValidRewrite = false; + + RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, + bool H) + : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt), + HighCost(H) {} }; } // end anonymous namespace @@ -671,41 +671,65 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { hasHardUserWithinLoop(L, Inst)) continue; + // Check if expansions of this SCEV would count as being high cost. bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); - - LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal - << '\n' - << " LoopVal = " << *Inst << "\n"); - - if (!isValidRewrite(Inst, ExitVal)) { - DeadInsts.push_back(ExitVal); - continue; - } -#ifndef NDEBUG - // If we reuse an instruction from a loop which is neither L nor one of - // its containing loops, we end up breaking LCSSA form for this loop by - // creating a new use of its instruction. - if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) - if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) - if (EVL != L) - assert(EVL->contains(L) && "LCSSA breach detected!"); -#endif + // Note that we must not perform expansions until after + // we query *all* the costs, because if we perform temporary expansion + // inbetween, one that we might not intend to keep, said expansion + // *may* affect cost calculation of the the next SCEV's we'll query, + // and next SCEV may errneously get smaller cost. // Collect all the candidate PHINodes to be rewritten. - RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); + RewritePhiSet.emplace_back(PN, i, ExitValue, Inst, HighCost); } } } + // Now that we've done preliminary filtering and billed all the SCEV's, + // we can perform the last sanity check - the expansion must be valid. + for (RewritePhi &Phi : RewritePhiSet) { + Phi.Expansion = Rewriter.expandCodeFor(Phi.ExpansionSCEV, Phi.PN->getType(), + Phi.ExpansionPoint); + + LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " + << *(Phi.Expansion) << '\n' + << " LoopVal = " << *(Phi.ExpansionPoint) << "\n"); + + // FIXME: isValidRewrite() is a hack. it should be an assert, eventually. + Phi.ValidRewrite = isValidRewrite(Phi.ExpansionPoint, Phi.Expansion); + if (!Phi.ValidRewrite) { + DeadInsts.push_back(Phi.Expansion); + continue; + } + +#ifndef NDEBUG + // If we reuse an instruction from a loop which is neither L nor one of + // its containing loops, we end up breaking LCSSA form for this loop by + // creating a new use of its instruction. + if (auto *ExitInsn = dyn_cast<Instruction>(Phi.Expansion)) + if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) + if (EVL != L) + assert(EVL->contains(L) && "LCSSA breach detected!"); +#endif + } + + // TODO: after isValidRewrite() is an assertion, evaluate whether + // it is beneficial to change how we calculate high-cost: + // if we have SCEV 'A' which we know we will expand, should we calculate + // the cost of other SCEV's after expanding SCEV 'A', + // thus potentially giving cost bonus to those other SCEV's? + bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); bool Changed = false; // Transformation. for (const RewritePhi &Phi : RewritePhiSet) { + if (!Phi.ValidRewrite) + continue; + PHINode *PN = Phi.PN; - Value *ExitVal = Phi.Val; + Value *ExitVal = Phi.Expansion; // Only do the rewrite when the ExitValue can be expanded cheaply. // If LoopCanBeDel is true, rewrite exit value aggressively. @@ -844,6 +868,8 @@ bool IndVarSimplify::canLoopBeDeleted( // phase later. Skip it in the loop invariant check below. bool found = false; for (const RewritePhi &Phi : RewritePhiSet) { + if (!Phi.ValidRewrite) + continue; unsigned i = Phi.Ith; if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { found = true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 623a8b711ed8..ac53ff33e836 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -104,6 +104,21 @@ static bool mergeEmptyReturnBlocks(Function &F) { continue; } + // Skip merging if this would result in a CallBr instruction with a + // duplicate destination. FIXME: See note in CodeGenPrepare.cpp. + bool SkipCallBr = false; + for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); + PI != E && !SkipCallBr; ++PI) { + if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator())) + for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) + if (RetBlock == CBI->getSuccessor(i)) { + SkipCallBr = true; + break; + } + } + if (SkipCallBr) + continue; + // Otherwise, we found a duplicate return block. Merge the two. Changed = true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp index da68d3713b40..d6b01a12b937 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -369,7 +369,8 @@ Value *Mapper::mapValue(const Value *V) { if (NewTy != IA->getFunctionType()) V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), - IA->hasSideEffects(), IA->isAlignStack()); + IA->hasSideEffects(), IA->isAlignStack(), + IA->getDialect()); } return getVM()[V] = const_cast<Value *>(V); diff --git a/contrib/llvm-project/llvm/tools/llvm-dwp/DWPError.cpp b/contrib/llvm-project/llvm/tools/llvm-dwp/DWPError.cpp new file mode 100644 index 000000000000..21d53ed6d198 --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-dwp/DWPError.cpp @@ -0,0 +1,3 @@ +#include "DWPError.h" +using namespace llvm; +char DWPError::ID; diff --git a/contrib/llvm-project/llvm/tools/llvm-dwp/DWPError.h b/contrib/llvm-project/llvm/tools/llvm-dwp/DWPError.h new file mode 100644 index 000000000000..62025ed4caa5 --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-dwp/DWPError.h @@ -0,0 +1,23 @@ +#ifndef TOOLS_LLVM_DWP_DWPERROR +#define TOOLS_LLVM_DWP_DWPERROR + +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include <string> + +namespace llvm { +class DWPError : public ErrorInfo<DWPError> { +public: + DWPError(std::string Info) : Info(std::move(Info)) {} + void log(raw_ostream &OS) const override { OS << Info; } + std::error_code convertToErrorCode() const override { + llvm_unreachable("Not implemented"); + } + static char ID; + +private: + std::string Info; +}; +} + +#endif diff --git a/contrib/llvm-project/llvm/tools/llvm-dwp/DWPStringPool.h b/contrib/llvm-project/llvm/tools/llvm-dwp/DWPStringPool.h new file mode 100644 index 000000000000..7d41176b5619 --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-dwp/DWPStringPool.h @@ -0,0 +1,56 @@ +#ifndef TOOLS_LLVM_DWP_DWPSTRINGPOOL +#define TOOLS_LLVM_DWP_DWPSTRINGPOOL + +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include <cassert> + +namespace llvm { +class DWPStringPool { + + struct CStrDenseMapInfo { + static inline const char *getEmptyKey() { + return reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)); + } + static inline const char *getTombstoneKey() { + return reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)); + } + static unsigned getHashValue(const char *Val) { + assert(Val != getEmptyKey() && "Cannot hash the empty key!"); + assert(Val != getTombstoneKey() && "Cannot hash the tombstone key!"); + return (unsigned)hash_value(StringRef(Val)); + } + static bool isEqual(const char *LHS, const char *RHS) { + if (RHS == getEmptyKey()) + return LHS == getEmptyKey(); + if (RHS == getTombstoneKey()) + return LHS == getTombstoneKey(); + return strcmp(LHS, RHS) == 0; + } + }; + + MCStreamer &Out; + MCSection *Sec; + DenseMap<const char *, uint32_t, CStrDenseMapInfo> Pool; + uint32_t Offset = 0; + +public: + DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {} + + uint32_t getOffset(const char *Str, unsigned Length) { + assert(strlen(Str) + 1 == Length && "Ensure length hint is correct"); + + auto Pair = Pool.insert(std::make_pair(Str, Offset)); + if (Pair.second) { + Out.SwitchSection(Sec); + Out.EmitBytes(StringRef(Str, Length)); + Offset += Length; + } + + return Pair.first->second; + } +}; +} + +#endif diff --git a/contrib/llvm-project/llvm/tools/llvm-dwp/llvm-dwp.cpp b/contrib/llvm-project/llvm/tools/llvm-dwp/llvm-dwp.cpp new file mode 100644 index 000000000000..23513ef8fb4e --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-dwp/llvm-dwp.cpp @@ -0,0 +1,749 @@ +//===-- llvm-dwp.cpp - Split DWARF merging tool for llvm ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A utility for merging DWARF 5 Split DWARF .dwo files into .dwp (DWARF +// package files). +// +//===----------------------------------------------------------------------===// +#include "DWPError.h" +#include "DWPStringPool.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/Object/Decompressor.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::object; + +cl::OptionCategory DwpCategory("Specific Options"); +static cl::list<std::string> InputFiles(cl::Positional, cl::ZeroOrMore, + cl::desc("<input files>"), + cl::cat(DwpCategory)); + +static cl::list<std::string> ExecFilenames( + "e", cl::ZeroOrMore, + cl::desc("Specify the executable/library files to get the list of *.dwo from"), + cl::value_desc("filename"), cl::cat(DwpCategory)); + +static cl::opt<std::string> OutputFilename(cl::Required, "o", + cl::desc("Specify the output file."), + cl::value_desc("filename"), + cl::cat(DwpCategory)); + +static void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, + MCSection *StrOffsetSection, + StringRef CurStrSection, + StringRef CurStrOffsetSection) { + // Could possibly produce an error or warning if one of these was non-null but + // the other was null. + if (CurStrSection.empty() || CurStrOffsetSection.empty()) + return; + + DenseMap<uint64_t, uint32_t> OffsetRemapping; + + DataExtractor Data(CurStrSection, true, 0); + uint64_t LocalOffset = 0; + uint64_t PrevOffset = 0; + while (const char *s = Data.getCStr(&LocalOffset)) { + OffsetRemapping[PrevOffset] = + Strings.getOffset(s, LocalOffset - PrevOffset); + PrevOffset = LocalOffset; + } + + Data = DataExtractor(CurStrOffsetSection, true, 0); + + Out.SwitchSection(StrOffsetSection); + + uint64_t Offset = 0; + uint64_t Size = CurStrOffsetSection.size(); + while (Offset < Size) { + auto OldOffset = Data.getU32(&Offset); + auto NewOffset = OffsetRemapping[OldOffset]; + Out.EmitIntValue(NewOffset, 4); + } +} + +static uint64_t getCUAbbrev(StringRef Abbrev, uint64_t AbbrCode) { + uint64_t CurCode; + uint64_t Offset = 0; + DataExtractor AbbrevData(Abbrev, true, 0); + while ((CurCode = AbbrevData.getULEB128(&Offset)) != AbbrCode) { + // Tag + AbbrevData.getULEB128(&Offset); + // DW_CHILDREN + AbbrevData.getU8(&Offset); + // Attributes + while (AbbrevData.getULEB128(&Offset) | AbbrevData.getULEB128(&Offset)) + ; + } + return Offset; +} + +struct CompileUnitIdentifiers { + uint64_t Signature = 0; + const char *Name = ""; + const char *DWOName = ""; +}; + +static Expected<const char *> +getIndexedString(dwarf::Form Form, DataExtractor InfoData, + uint64_t &InfoOffset, StringRef StrOffsets, StringRef Str) { + if (Form == dwarf::DW_FORM_string) + return InfoData.getCStr(&InfoOffset); + if (Form != dwarf::DW_FORM_GNU_str_index) + return make_error<DWPError>( + "string field encoded without DW_FORM_string or DW_FORM_GNU_str_index"); + auto StrIndex = InfoData.getULEB128(&InfoOffset); + DataExtractor StrOffsetsData(StrOffsets, true, 0); + uint64_t StrOffsetsOffset = 4 * StrIndex; + uint64_t StrOffset = StrOffsetsData.getU32(&StrOffsetsOffset); + DataExtractor StrData(Str, true, 0); + return StrData.getCStr(&StrOffset); +} + +static Expected<CompileUnitIdentifiers> getCUIdentifiers(StringRef Abbrev, + StringRef Info, + StringRef StrOffsets, + StringRef Str) { + uint64_t Offset = 0; + DataExtractor InfoData(Info, true, 0); + dwarf::DwarfFormat Format = dwarf::DwarfFormat::DWARF32; + uint64_t Length = InfoData.getU32(&Offset); + // If the length is 0xffffffff, then this indictes that this is a DWARF 64 + // stream and the length is actually encoded into a 64 bit value that follows. + if (Length == 0xffffffffU) { + Format = dwarf::DwarfFormat::DWARF64; + Length = InfoData.getU64(&Offset); + } + uint16_t Version = InfoData.getU16(&Offset); + InfoData.getU32(&Offset); // Abbrev offset (should be zero) + uint8_t AddrSize = InfoData.getU8(&Offset); + + uint32_t AbbrCode = InfoData.getULEB128(&Offset); + + DataExtractor AbbrevData(Abbrev, true, 0); + uint64_t AbbrevOffset = getCUAbbrev(Abbrev, AbbrCode); + auto Tag = static_cast<dwarf::Tag>(AbbrevData.getULEB128(&AbbrevOffset)); + if (Tag != dwarf::DW_TAG_compile_unit) + return make_error<DWPError>("top level DIE is not a compile unit"); + // DW_CHILDREN + AbbrevData.getU8(&AbbrevOffset); + uint32_t Name; + dwarf::Form Form; + CompileUnitIdentifiers ID; + Optional<uint64_t> Signature = None; + while ((Name = AbbrevData.getULEB128(&AbbrevOffset)) | + (Form = static_cast<dwarf::Form>(AbbrevData.getULEB128(&AbbrevOffset))) && + (Name != 0 || Form != 0)) { + switch (Name) { + case dwarf::DW_AT_name: { + Expected<const char *> EName = + getIndexedString(Form, InfoData, Offset, StrOffsets, Str); + if (!EName) + return EName.takeError(); + ID.Name = *EName; + break; + } + case dwarf::DW_AT_GNU_dwo_name: { + Expected<const char *> EName = + getIndexedString(Form, InfoData, Offset, StrOffsets, Str); + if (!EName) + return EName.takeError(); + ID.DWOName = *EName; + break; + } + case dwarf::DW_AT_GNU_dwo_id: + Signature = InfoData.getU64(&Offset); + break; + default: + DWARFFormValue::skipValue(Form, InfoData, &Offset, + dwarf::FormParams({Version, AddrSize, Format})); + } + } + if (!Signature) + return make_error<DWPError>("compile unit missing dwo_id"); + ID.Signature = *Signature; + return ID; +} + +struct UnitIndexEntry { + DWARFUnitIndex::Entry::SectionContribution Contributions[8]; + std::string Name; + std::string DWOName; + StringRef DWPName; +}; + +static StringRef getSubsection(StringRef Section, + const DWARFUnitIndex::Entry &Entry, + DWARFSectionKind Kind) { + const auto *Off = Entry.getOffset(Kind); + if (!Off) + return StringRef(); + return Section.substr(Off->Offset, Off->Length); +} + +static void addAllTypesFromDWP( + MCStreamer &Out, MapVector<uint64_t, UnitIndexEntry> &TypeIndexEntries, + const DWARFUnitIndex &TUIndex, MCSection *OutputTypes, StringRef Types, + const UnitIndexEntry &TUEntry, uint32_t &TypesOffset) { + Out.SwitchSection(OutputTypes); + for (const DWARFUnitIndex::Entry &E : TUIndex.getRows()) { + auto *I = E.getOffsets(); + if (!I) + continue; + auto P = TypeIndexEntries.insert(std::make_pair(E.getSignature(), TUEntry)); + if (!P.second) + continue; + auto &Entry = P.first->second; + // Zero out the debug_info contribution + Entry.Contributions[0] = {}; + for (auto Kind : TUIndex.getColumnKinds()) { + auto &C = Entry.Contributions[Kind - DW_SECT_INFO]; + C.Offset += I->Offset; + C.Length = I->Length; + ++I; + } + auto &C = Entry.Contributions[DW_SECT_TYPES - DW_SECT_INFO]; + Out.EmitBytes(Types.substr( + C.Offset - TUEntry.Contributions[DW_SECT_TYPES - DW_SECT_INFO].Offset, + C.Length)); + C.Offset = TypesOffset; + TypesOffset += C.Length; + } +} + +static void addAllTypes(MCStreamer &Out, + MapVector<uint64_t, UnitIndexEntry> &TypeIndexEntries, + MCSection *OutputTypes, + const std::vector<StringRef> &TypesSections, + const UnitIndexEntry &CUEntry, uint32_t &TypesOffset) { + for (StringRef Types : TypesSections) { + Out.SwitchSection(OutputTypes); + uint64_t Offset = 0; + DataExtractor Data(Types, true, 0); + while (Data.isValidOffset(Offset)) { + UnitIndexEntry Entry = CUEntry; + // Zero out the debug_info contribution + Entry.Contributions[0] = {}; + auto &C = Entry.Contributions[DW_SECT_TYPES - DW_SECT_INFO]; + C.Offset = TypesOffset; + auto PrevOffset = Offset; + // Length of the unit, including the 4 byte length field. + C.Length = Data.getU32(&Offset) + 4; + + Data.getU16(&Offset); // Version + Data.getU32(&Offset); // Abbrev offset + Data.getU8(&Offset); // Address size + auto Signature = Data.getU64(&Offset); + Offset = PrevOffset + C.Length; + + auto P = TypeIndexEntries.insert(std::make_pair(Signature, Entry)); + if (!P.second) + continue; + + Out.EmitBytes(Types.substr(PrevOffset, C.Length)); + TypesOffset += C.Length; + } + } +} + +static void +writeIndexTable(MCStreamer &Out, ArrayRef<unsigned> ContributionOffsets, + const MapVector<uint64_t, UnitIndexEntry> &IndexEntries, + uint32_t DWARFUnitIndex::Entry::SectionContribution::*Field) { + for (const auto &E : IndexEntries) + for (size_t i = 0; i != array_lengthof(E.second.Contributions); ++i) + if (ContributionOffsets[i]) + Out.EmitIntValue(E.second.Contributions[i].*Field, 4); +} + +static void +writeIndex(MCStreamer &Out, MCSection *Section, + ArrayRef<unsigned> ContributionOffsets, + const MapVector<uint64_t, UnitIndexEntry> &IndexEntries) { + if (IndexEntries.empty()) + return; + + unsigned Columns = 0; + for (auto &C : ContributionOffsets) + if (C) + ++Columns; + + std::vector<unsigned> Buckets(NextPowerOf2(3 * IndexEntries.size() / 2)); + uint64_t Mask = Buckets.size() - 1; + size_t i = 0; + for (const auto &P : IndexEntries) { + auto S = P.first; + auto H = S & Mask; + auto HP = ((S >> 32) & Mask) | 1; + while (Buckets[H]) { + assert(S != IndexEntries.begin()[Buckets[H] - 1].first && + "Duplicate unit"); + H = (H + HP) & Mask; + } + Buckets[H] = i + 1; + ++i; + } + + Out.SwitchSection(Section); + Out.EmitIntValue(2, 4); // Version + Out.EmitIntValue(Columns, 4); // Columns + Out.EmitIntValue(IndexEntries.size(), 4); // Num Units + Out.EmitIntValue(Buckets.size(), 4); // Num Buckets + + // Write the signatures. + for (const auto &I : Buckets) + Out.EmitIntValue(I ? IndexEntries.begin()[I - 1].first : 0, 8); + + // Write the indexes. + for (const auto &I : Buckets) + Out.EmitIntValue(I, 4); + + // Write the column headers (which sections will appear in the table) + for (size_t i = 0; i != ContributionOffsets.size(); ++i) + if (ContributionOffsets[i]) + Out.EmitIntValue(i + DW_SECT_INFO, 4); + + // Write the offsets. + writeIndexTable(Out, ContributionOffsets, IndexEntries, + &DWARFUnitIndex::Entry::SectionContribution::Offset); + + // Write the lengths. + writeIndexTable(Out, ContributionOffsets, IndexEntries, + &DWARFUnitIndex::Entry::SectionContribution::Length); +} + +std::string buildDWODescription(StringRef Name, StringRef DWPName, StringRef DWOName) { + std::string Text = "\'"; + Text += Name; + Text += '\''; + if (!DWPName.empty()) { + Text += " (from "; + if (!DWOName.empty()) { + Text += '\''; + Text += DWOName; + Text += "' in "; + } + Text += '\''; + Text += DWPName; + Text += "')"; + } + return Text; +} + +static Error createError(StringRef Name, Error E) { + return make_error<DWPError>( + ("failure while decompressing compressed section: '" + Name + "', " + + llvm::toString(std::move(E))) + .str()); +} + +static Error +handleCompressedSection(std::deque<SmallString<32>> &UncompressedSections, + StringRef &Name, StringRef &Contents) { + if (!Decompressor::isGnuStyle(Name)) + return Error::success(); + + Expected<Decompressor> Dec = + Decompressor::create(Name, Contents, false /*IsLE*/, false /*Is64Bit*/); + if (!Dec) + return createError(Name, Dec.takeError()); + + UncompressedSections.emplace_back(); + if (Error E = Dec->resizeAndDecompress(UncompressedSections.back())) + return createError(Name, std::move(E)); + + Name = Name.substr(2); // Drop ".z" + Contents = UncompressedSections.back(); + return Error::success(); +} + +static Error handleSection( + const StringMap<std::pair<MCSection *, DWARFSectionKind>> &KnownSections, + const MCSection *StrSection, const MCSection *StrOffsetSection, + const MCSection *TypesSection, const MCSection *CUIndexSection, + const MCSection *TUIndexSection, const SectionRef &Section, MCStreamer &Out, + std::deque<SmallString<32>> &UncompressedSections, + uint32_t (&ContributionOffsets)[8], UnitIndexEntry &CurEntry, + StringRef &CurStrSection, StringRef &CurStrOffsetSection, + std::vector<StringRef> &CurTypesSection, StringRef &InfoSection, + StringRef &AbbrevSection, StringRef &CurCUIndexSection, + StringRef &CurTUIndexSection) { + if (Section.isBSS()) + return Error::success(); + + if (Section.isVirtual()) + return Error::success(); + + Expected<StringRef> NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = *NameOrErr; + + Expected<StringRef> ContentsOrErr = Section.getContents(); + if (!ContentsOrErr) + return ContentsOrErr.takeError(); + StringRef Contents = *ContentsOrErr; + + if (auto Err = handleCompressedSection(UncompressedSections, Name, Contents)) + return Err; + + Name = Name.substr(Name.find_first_not_of("._")); + + auto SectionPair = KnownSections.find(Name); + if (SectionPair == KnownSections.end()) + return Error::success(); + + if (DWARFSectionKind Kind = SectionPair->second.second) { + auto Index = Kind - DW_SECT_INFO; + if (Kind != DW_SECT_TYPES) { + CurEntry.Contributions[Index].Offset = ContributionOffsets[Index]; + ContributionOffsets[Index] += + (CurEntry.Contributions[Index].Length = Contents.size()); + } + + switch (Kind) { + case DW_SECT_INFO: + InfoSection = Contents; + break; + case DW_SECT_ABBREV: + AbbrevSection = Contents; + break; + default: + break; + } + } + + MCSection *OutSection = SectionPair->second.first; + if (OutSection == StrOffsetSection) + CurStrOffsetSection = Contents; + else if (OutSection == StrSection) + CurStrSection = Contents; + else if (OutSection == TypesSection) + CurTypesSection.push_back(Contents); + else if (OutSection == CUIndexSection) + CurCUIndexSection = Contents; + else if (OutSection == TUIndexSection) + CurTUIndexSection = Contents; + else { + Out.SwitchSection(OutSection); + Out.EmitBytes(Contents); + } + return Error::success(); +} + +static Error +buildDuplicateError(const std::pair<uint64_t, UnitIndexEntry> &PrevE, + const CompileUnitIdentifiers &ID, StringRef DWPName) { + return make_error<DWPError>( + std::string("Duplicate DWO ID (") + utohexstr(PrevE.first) + ") in " + + buildDWODescription(PrevE.second.Name, PrevE.second.DWPName, + PrevE.second.DWOName) + + " and " + buildDWODescription(ID.Name, DWPName, ID.DWOName)); +} + +static Expected<SmallVector<std::string, 16>> +getDWOFilenames(StringRef ExecFilename) { + auto ErrOrObj = object::ObjectFile::createObjectFile(ExecFilename); + if (!ErrOrObj) + return ErrOrObj.takeError(); + + const ObjectFile &Obj = *ErrOrObj.get().getBinary(); + std::unique_ptr<DWARFContext> DWARFCtx = DWARFContext::create(Obj); + + SmallVector<std::string, 16> DWOPaths; + for (const auto &CU : DWARFCtx->compile_units()) { + const DWARFDie &Die = CU->getUnitDIE(); + std::string DWOName = dwarf::toString( + Die.find({dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), ""); + if (DWOName.empty()) + continue; + std::string DWOCompDir = + dwarf::toString(Die.find(dwarf::DW_AT_comp_dir), ""); + if (!DWOCompDir.empty()) { + SmallString<16> DWOPath; + sys::path::append(DWOPath, DWOCompDir, DWOName); + DWOPaths.emplace_back(DWOPath.data(), DWOPath.size()); + } else { + DWOPaths.push_back(std::move(DWOName)); + } + } + return std::move(DWOPaths); +} + +static Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) { + const auto &MCOFI = *Out.getContext().getObjectFileInfo(); + MCSection *const StrSection = MCOFI.getDwarfStrDWOSection(); + MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection(); + MCSection *const TypesSection = MCOFI.getDwarfTypesDWOSection(); + MCSection *const CUIndexSection = MCOFI.getDwarfCUIndexSection(); + MCSection *const TUIndexSection = MCOFI.getDwarfTUIndexSection(); + const StringMap<std::pair<MCSection *, DWARFSectionKind>> KnownSections = { + {"debug_info.dwo", {MCOFI.getDwarfInfoDWOSection(), DW_SECT_INFO}}, + {"debug_types.dwo", {MCOFI.getDwarfTypesDWOSection(), DW_SECT_TYPES}}, + {"debug_str_offsets.dwo", {StrOffsetSection, DW_SECT_STR_OFFSETS}}, + {"debug_str.dwo", {StrSection, static_cast<DWARFSectionKind>(0)}}, + {"debug_loc.dwo", {MCOFI.getDwarfLocDWOSection(), DW_SECT_LOC}}, + {"debug_line.dwo", {MCOFI.getDwarfLineDWOSection(), DW_SECT_LINE}}, + {"debug_abbrev.dwo", {MCOFI.getDwarfAbbrevDWOSection(), DW_SECT_ABBREV}}, + {"debug_cu_index", {CUIndexSection, static_cast<DWARFSectionKind>(0)}}, + {"debug_tu_index", {TUIndexSection, static_cast<DWARFSectionKind>(0)}}}; + + MapVector<uint64_t, UnitIndexEntry> IndexEntries; + MapVector<uint64_t, UnitIndexEntry> TypeIndexEntries; + + uint32_t ContributionOffsets[8] = {}; + + DWPStringPool Strings(Out, StrSection); + + SmallVector<OwningBinary<object::ObjectFile>, 128> Objects; + Objects.reserve(Inputs.size()); + + std::deque<SmallString<32>> UncompressedSections; + + for (const auto &Input : Inputs) { + auto ErrOrObj = object::ObjectFile::createObjectFile(Input); + if (!ErrOrObj) + return ErrOrObj.takeError(); + + auto &Obj = *ErrOrObj->getBinary(); + Objects.push_back(std::move(*ErrOrObj)); + + UnitIndexEntry CurEntry = {}; + + StringRef CurStrSection; + StringRef CurStrOffsetSection; + std::vector<StringRef> CurTypesSection; + StringRef InfoSection; + StringRef AbbrevSection; + StringRef CurCUIndexSection; + StringRef CurTUIndexSection; + + for (const auto &Section : Obj.sections()) + if (auto Err = handleSection( + KnownSections, StrSection, StrOffsetSection, TypesSection, + CUIndexSection, TUIndexSection, Section, Out, + UncompressedSections, ContributionOffsets, CurEntry, + CurStrSection, CurStrOffsetSection, CurTypesSection, InfoSection, + AbbrevSection, CurCUIndexSection, CurTUIndexSection)) + return Err; + + if (InfoSection.empty()) + continue; + + writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection, + CurStrOffsetSection); + + if (CurCUIndexSection.empty()) { + Expected<CompileUnitIdentifiers> EID = getCUIdentifiers( + AbbrevSection, InfoSection, CurStrOffsetSection, CurStrSection); + if (!EID) + return createFileError(Input, EID.takeError()); + const auto &ID = *EID; + auto P = IndexEntries.insert(std::make_pair(ID.Signature, CurEntry)); + if (!P.second) + return buildDuplicateError(*P.first, ID, ""); + P.first->second.Name = ID.Name; + P.first->second.DWOName = ID.DWOName; + addAllTypes(Out, TypeIndexEntries, TypesSection, CurTypesSection, + CurEntry, ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO]); + continue; + } + + DWARFUnitIndex CUIndex(DW_SECT_INFO); + DataExtractor CUIndexData(CurCUIndexSection, Obj.isLittleEndian(), 0); + if (!CUIndex.parse(CUIndexData)) + return make_error<DWPError>("Failed to parse cu_index"); + + for (const DWARFUnitIndex::Entry &E : CUIndex.getRows()) { + auto *I = E.getOffsets(); + if (!I) + continue; + auto P = IndexEntries.insert(std::make_pair(E.getSignature(), CurEntry)); + Expected<CompileUnitIdentifiers> EID = getCUIdentifiers( + getSubsection(AbbrevSection, E, DW_SECT_ABBREV), + getSubsection(InfoSection, E, DW_SECT_INFO), + getSubsection(CurStrOffsetSection, E, DW_SECT_STR_OFFSETS), + CurStrSection); + if (!EID) + return createFileError(Input, EID.takeError()); + const auto &ID = *EID; + if (!P.second) + return buildDuplicateError(*P.first, ID, Input); + auto &NewEntry = P.first->second; + NewEntry.Name = ID.Name; + NewEntry.DWOName = ID.DWOName; + NewEntry.DWPName = Input; + for (auto Kind : CUIndex.getColumnKinds()) { + auto &C = NewEntry.Contributions[Kind - DW_SECT_INFO]; + C.Offset += I->Offset; + C.Length = I->Length; + ++I; + } + } + + if (!CurTypesSection.empty()) { + if (CurTypesSection.size() != 1) + return make_error<DWPError>("multiple type unit sections in .dwp file"); + DWARFUnitIndex TUIndex(DW_SECT_TYPES); + DataExtractor TUIndexData(CurTUIndexSection, Obj.isLittleEndian(), 0); + if (!TUIndex.parse(TUIndexData)) + return make_error<DWPError>("Failed to parse tu_index"); + addAllTypesFromDWP(Out, TypeIndexEntries, TUIndex, TypesSection, + CurTypesSection.front(), CurEntry, + ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO]); + } + } + + // Lie about there being no info contributions so the TU index only includes + // the type unit contribution + ContributionOffsets[0] = 0; + writeIndex(Out, MCOFI.getDwarfTUIndexSection(), ContributionOffsets, + TypeIndexEntries); + + // Lie about the type contribution + ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO] = 0; + // Unlie about the info contribution + ContributionOffsets[0] = 1; + + writeIndex(Out, MCOFI.getDwarfCUIndexSection(), ContributionOffsets, + IndexEntries); + + return Error::success(); +} + +static int error(const Twine &Error, const Twine &Context) { + errs() << Twine("while processing ") + Context + ":\n"; + errs() << Twine("error: ") + Error + "\n"; + return 1; +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "merge split dwarf (.dwo) files\n"); + + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + + std::string ErrorStr; + StringRef Context = "dwarf streamer init"; + + Triple TheTriple("x86_64-linux-gnu"); + + // Get the target. + const Target *TheTarget = + TargetRegistry::lookupTarget("", TheTriple, ErrorStr); + if (!TheTarget) + return error(ErrorStr, Context); + std::string TripleName = TheTriple.getTriple(); + + // Create all the MC Objects. + std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) + return error(Twine("no register info for target ") + TripleName, Context); + + MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + std::unique_ptr<MCAsmInfo> MAI( + TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); + if (!MAI) + return error("no asm info for target " + TripleName, Context); + + MCObjectFileInfo MOFI; + MCContext MC(MAI.get(), MRI.get(), &MOFI); + MOFI.InitMCObjectFileInfo(TheTriple, /*PIC*/ false, MC); + + std::unique_ptr<MCSubtargetInfo> MSTI( + TheTarget->createMCSubtargetInfo(TripleName, "", "")); + if (!MSTI) + return error("no subtarget info for target " + TripleName, Context); + + MCTargetOptions Options; + auto MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, Options); + if (!MAB) + return error("no asm backend for target " + TripleName, Context); + + std::unique_ptr<MCInstrInfo> MII(TheTarget->createMCInstrInfo()); + if (!MII) + return error("no instr info info for target " + TripleName, Context); + + MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, MC); + if (!MCE) + return error("no code emitter for target " + TripleName, Context); + + // Create the output file. + std::error_code EC; + ToolOutputFile OutFile(OutputFilename, EC, sys::fs::OF_None); + Optional<buffer_ostream> BOS; + raw_pwrite_stream *OS; + if (EC) + return error(Twine(OutputFilename) + ": " + EC.message(), Context); + if (OutFile.os().supportsSeeking()) { + OS = &OutFile.os(); + } else { + BOS.emplace(OutFile.os()); + OS = BOS.getPointer(); + } + + std::unique_ptr<MCStreamer> MS(TheTarget->createMCObjectStreamer( + TheTriple, MC, std::unique_ptr<MCAsmBackend>(MAB), + MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(MCE), *MSTI, + MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false)); + if (!MS) + return error("no object streamer for target " + TripleName, Context); + + std::vector<std::string> DWOFilenames = InputFiles; + for (const auto &ExecFilename : ExecFilenames) { + auto DWOs = getDWOFilenames(ExecFilename); + if (!DWOs) { + logAllUnhandledErrors(DWOs.takeError(), WithColor::error()); + return 1; + } + DWOFilenames.insert(DWOFilenames.end(), + std::make_move_iterator(DWOs->begin()), + std::make_move_iterator(DWOs->end())); + } + + if (auto Err = write(*MS, DWOFilenames)) { + logAllUnhandledErrors(std::move(Err), WithColor::error()); + return 1; + } + + MS->Finish(); + OutFile.keep(); + return 0; +} diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp index ad53c75663ec..5e5ed95de743 100644 --- a/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/ELF/Object.cpp @@ -2254,27 +2254,28 @@ Error BinaryWriter::finalize() { OrderedSegments.erase(End, std::end(OrderedSegments)); // Compute the section LMA based on its sh_offset and the containing segment's - // p_offset and p_paddr. Also compute the minimum LMA of all sections as - // MinAddr. In the output, the contents between address 0 and MinAddr will be - // skipped. + // p_offset and p_paddr. Also compute the minimum LMA of all non-empty + // sections as MinAddr. In the output, the contents between address 0 and + // MinAddr will be skipped. uint64_t MinAddr = UINT64_MAX; for (SectionBase &Sec : Obj.allocSections()) { if (Sec.ParentSegment != nullptr) Sec.Addr = Sec.Offset - Sec.ParentSegment->Offset + Sec.ParentSegment->PAddr; - MinAddr = std::min(MinAddr, Sec.Addr); + if (Sec.Size > 0) + MinAddr = std::min(MinAddr, Sec.Addr); } // Now that every section has been laid out we just need to compute the total // file size. This might not be the same as the offset returned by // layoutSections, because we want to truncate the last segment to the end of - // its last section, to match GNU objcopy's behaviour. + // its last non-empty section, to match GNU objcopy's behaviour. TotalSize = 0; - for (SectionBase &Sec : Obj.allocSections()) { - Sec.Offset = Sec.Addr - MinAddr; - if (Sec.Type != SHT_NOBITS) + for (SectionBase &Sec : Obj.allocSections()) + if (Sec.Type != SHT_NOBITS && Sec.Size > 0) { + Sec.Offset = Sec.Addr - MinAddr; TotalSize = std::max(TotalSize, Sec.Offset + Sec.Size); - } + } if (Error E = Buf.allocate(TotalSize)) return E; diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index e662f35f4b08..4a44a7ab0875 100644 --- a/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -322,11 +322,25 @@ enum class ToolType { Objcopy, Strip, InstallNameTool }; int main(int argc, char **argv) { InitLLVM X(argc, argv); ToolName = argv[0]; - ToolType Tool = StringSwitch<ToolType>(sys::path::stem(ToolName)) - .EndsWith("strip", ToolType::Strip) - .EndsWith("install-name-tool", ToolType::InstallNameTool) - .EndsWith("install_name_tool", ToolType::InstallNameTool) - .Default(ToolType::Objcopy); + + StringRef Stem = sys::path::stem(ToolName); + auto Is = [=](StringRef Tool) { + // We need to recognize the following filenames: + // + // llvm-objcopy -> objcopy + // strip-10.exe -> strip + // powerpc64-unknown-freebsd13-objcopy -> objcopy + // llvm-install-name-tool -> install-name-tool + auto I = Stem.rfind_lower(Tool); + return I != StringRef::npos && + (I + Tool.size() == Stem.size() || !isAlnum(Stem[I + Tool.size()])); + }; + ToolType Tool = ToolType::Objcopy; + if (Is("strip")) + Tool = ToolType::Strip; + else if (Is("install-name-tool") || Is("install_name_tool")) + Tool = ToolType::InstallNameTool; + // Expand response files. // TODO: Move these lines, which are copied from lib/Support/CommandLine.cpp, // into a separate function in the CommandLine library and call that function diff --git a/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp b/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp new file mode 100644 index 000000000000..46ece5a6f0c9 --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp @@ -0,0 +1,894 @@ +//===-- llvm-size.cpp - Print the size of each object section ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This program is a utility that works like traditional Unix "size", +// that is, it prints out the size of each section, and the total size of all +// sections. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APInt.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <string> +#include <system_error> + +using namespace llvm; +using namespace object; + +cl::OptionCategory SizeCat("llvm-size Options"); + +enum OutputFormatTy { berkeley, sysv, darwin }; +static cl::opt<OutputFormatTy> + OutputFormat("format", cl::desc("Specify output format"), + cl::values(clEnumVal(sysv, "System V format"), + clEnumVal(berkeley, "Berkeley format"), + clEnumVal(darwin, "Darwin -m format")), + cl::init(berkeley), cl::cat(SizeCat)); + +static cl::opt<OutputFormatTy> + OutputFormatShort(cl::desc("Specify output format"), + cl::values(clEnumValN(sysv, "A", "System V format"), + clEnumValN(berkeley, "B", "Berkeley format"), + clEnumValN(darwin, "m", "Darwin -m format")), + cl::init(berkeley), cl::cat(SizeCat)); + +static bool BerkeleyHeaderPrinted = false; +static bool MoreThanOneFile = false; +static uint64_t TotalObjectText = 0; +static uint64_t TotalObjectData = 0; +static uint64_t TotalObjectBss = 0; +static uint64_t TotalObjectTotal = 0; + +cl::opt<bool> + DarwinLongFormat("l", + cl::desc("When format is darwin, use long format " + "to include addresses and offsets."), + cl::cat(SizeCat)); + +cl::opt<bool> + ELFCommons("common", + cl::desc("Print common symbols in the ELF file. When using " + "Berkeley format, this is added to bss."), + cl::init(false), cl::cat(SizeCat)); + +static cl::list<std::string> + ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"), + cl::ZeroOrMore, cl::cat(SizeCat)); +static bool ArchAll = false; + +enum RadixTy { octal = 8, decimal = 10, hexadecimal = 16 }; +static cl::opt<RadixTy> Radix( + "radix", cl::desc("Print size in radix"), cl::init(decimal), + cl::values(clEnumValN(octal, "8", "Print size in octal"), + clEnumValN(decimal, "10", "Print size in decimal"), + clEnumValN(hexadecimal, "16", "Print size in hexadecimal")), + cl::cat(SizeCat)); + +static cl::opt<RadixTy> RadixShort( + cl::desc("Print size in radix:"), + cl::values(clEnumValN(octal, "o", "Print size in octal"), + clEnumValN(decimal, "d", "Print size in decimal"), + clEnumValN(hexadecimal, "x", "Print size in hexadecimal")), + cl::init(decimal), cl::cat(SizeCat)); + +static cl::opt<bool> + TotalSizes("totals", + cl::desc("Print totals of all objects - Berkeley format only"), + cl::init(false), cl::cat(SizeCat)); + +static cl::alias TotalSizesShort("t", cl::desc("Short for --totals"), + cl::aliasopt(TotalSizes)); + +static cl::list<std::string> + InputFilenames(cl::Positional, cl::desc("<input files>"), cl::ZeroOrMore); + +static cl::extrahelp + HelpResponse("\nPass @FILE as argument to read options from FILE.\n"); + +static bool HadError = false; + +static std::string ToolName; + +static void error(const Twine &Message, StringRef File) { + HadError = true; + WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n"; +} + +// This version of error() prints the archive name and member name, for example: +// "libx.a(foo.o)" after the ToolName before the error message. It sets +// HadError but returns allowing the code to move on to other archive members. +static void error(llvm::Error E, StringRef FileName, const Archive::Child &C, + StringRef ArchitectureName = StringRef()) { + HadError = true; + WithColor::error(errs(), ToolName) << "'" << FileName << "'"; + + Expected<StringRef> NameOrErr = C.getName(); + // TODO: if we have a error getting the name then it would be nice to print + // the index of which archive member this is and or its offset in the + // archive instead of "???" as the name. + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + errs() << "(" << "???" << ")"; + } else + errs() << "(" << NameOrErr.get() << ")"; + + if (!ArchitectureName.empty()) + errs() << " (for architecture " << ArchitectureName << ") "; + + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(std::move(E), OS); + OS.flush(); + errs() << ": " << Buf << "\n"; +} + +// This version of error() prints the file name and which architecture slice it // is from, for example: "foo.o (for architecture i386)" after the ToolName +// before the error message. It sets HadError but returns allowing the code to +// move on to other architecture slices. +static void error(llvm::Error E, StringRef FileName, + StringRef ArchitectureName = StringRef()) { + HadError = true; + WithColor::error(errs(), ToolName) << "'" << FileName << "'"; + + if (!ArchitectureName.empty()) + errs() << " (for architecture " << ArchitectureName << ") "; + + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(std::move(E), OS); + OS.flush(); + errs() << ": " << Buf << "\n"; +} + +/// Get the length of the string that represents @p num in Radix including the +/// leading 0x or 0 for hexadecimal and octal respectively. +static size_t getNumLengthAsString(uint64_t num) { + APInt conv(64, num); + SmallString<32> result; + conv.toString(result, Radix, false, true); + return result.size(); +} + +/// Return the printing format for the Radix. +static const char *getRadixFmt() { + switch (Radix) { + case octal: + return PRIo64; + case decimal: + return PRIu64; + case hexadecimal: + return PRIx64; + } + return nullptr; +} + +/// Remove unneeded ELF sections from calculation +static bool considerForSize(ObjectFile *Obj, SectionRef Section) { + if (!Obj->isELF()) + return true; + switch (static_cast<ELFSectionRef>(Section).getType()) { + case ELF::SHT_NULL: + case ELF::SHT_SYMTAB: + case ELF::SHT_STRTAB: + case ELF::SHT_REL: + case ELF::SHT_RELA: + return false; + } + return true; +} + +/// Total size of all ELF common symbols +static uint64_t getCommonSize(ObjectFile *Obj) { + uint64_t TotalCommons = 0; + for (auto &Sym : Obj->symbols()) + if (Obj->getSymbolFlags(Sym.getRawDataRefImpl()) & SymbolRef::SF_Common) + TotalCommons += Obj->getCommonSymbolSize(Sym.getRawDataRefImpl()); + return TotalCommons; +} + +/// Print the size of each Mach-O segment and section in @p MachO. +/// +/// This is when used when @c OutputFormat is darwin and produces the same +/// output as darwin's size(1) -m output. +static void printDarwinSectionSizes(MachOObjectFile *MachO) { + std::string fmtbuf; + raw_string_ostream fmt(fmtbuf); + const char *radix_fmt = getRadixFmt(); + if (Radix == hexadecimal) + fmt << "0x"; + fmt << "%" << radix_fmt; + + uint32_t Filetype = MachO->getHeader().filetype; + + uint64_t total = 0; + for (const auto &Load : MachO->load_commands()) { + if (Load.C.cmd == MachO::LC_SEGMENT_64) { + MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Load); + outs() << "Segment " << Seg.segname << ": " + << format(fmt.str().c_str(), Seg.vmsize); + if (DarwinLongFormat) + outs() << " (vmaddr 0x" << format("%" PRIx64, Seg.vmaddr) << " fileoff " + << Seg.fileoff << ")"; + outs() << "\n"; + total += Seg.vmsize; + uint64_t sec_total = 0; + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section_64 Sec = MachO->getSection64(Load, J); + if (Filetype == MachO::MH_OBJECT) + outs() << "\tSection (" << format("%.16s", &Sec.segname) << ", " + << format("%.16s", &Sec.sectname) << "): "; + else + outs() << "\tSection " << format("%.16s", &Sec.sectname) << ": "; + outs() << format(fmt.str().c_str(), Sec.size); + if (DarwinLongFormat) + outs() << " (addr 0x" << format("%" PRIx64, Sec.addr) << " offset " + << Sec.offset << ")"; + outs() << "\n"; + sec_total += Sec.size; + } + if (Seg.nsects != 0) + outs() << "\ttotal " << format(fmt.str().c_str(), sec_total) << "\n"; + } else if (Load.C.cmd == MachO::LC_SEGMENT) { + MachO::segment_command Seg = MachO->getSegmentLoadCommand(Load); + uint64_t Seg_vmsize = Seg.vmsize; + outs() << "Segment " << Seg.segname << ": " + << format(fmt.str().c_str(), Seg_vmsize); + if (DarwinLongFormat) + outs() << " (vmaddr 0x" << format("%" PRIx32, Seg.vmaddr) << " fileoff " + << Seg.fileoff << ")"; + outs() << "\n"; + total += Seg.vmsize; + uint64_t sec_total = 0; + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section Sec = MachO->getSection(Load, J); + if (Filetype == MachO::MH_OBJECT) + outs() << "\tSection (" << format("%.16s", &Sec.segname) << ", " + << format("%.16s", &Sec.sectname) << "): "; + else + outs() << "\tSection " << format("%.16s", &Sec.sectname) << ": "; + uint64_t Sec_size = Sec.size; + outs() << format(fmt.str().c_str(), Sec_size); + if (DarwinLongFormat) + outs() << " (addr 0x" << format("%" PRIx32, Sec.addr) << " offset " + << Sec.offset << ")"; + outs() << "\n"; + sec_total += Sec.size; + } + if (Seg.nsects != 0) + outs() << "\ttotal " << format(fmt.str().c_str(), sec_total) << "\n"; + } + } + outs() << "total " << format(fmt.str().c_str(), total) << "\n"; +} + +/// Print the summary sizes of the standard Mach-O segments in @p MachO. +/// +/// This is when used when @c OutputFormat is berkeley with a Mach-O file and +/// produces the same output as darwin's size(1) default output. +static void printDarwinSegmentSizes(MachOObjectFile *MachO) { + uint64_t total_text = 0; + uint64_t total_data = 0; + uint64_t total_objc = 0; + uint64_t total_others = 0; + for (const auto &Load : MachO->load_commands()) { + if (Load.C.cmd == MachO::LC_SEGMENT_64) { + MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Load); + if (MachO->getHeader().filetype == MachO::MH_OBJECT) { + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section_64 Sec = MachO->getSection64(Load, J); + StringRef SegmentName = StringRef(Sec.segname); + if (SegmentName == "__TEXT") + total_text += Sec.size; + else if (SegmentName == "__DATA") + total_data += Sec.size; + else if (SegmentName == "__OBJC") + total_objc += Sec.size; + else + total_others += Sec.size; + } + } else { + StringRef SegmentName = StringRef(Seg.segname); + if (SegmentName == "__TEXT") + total_text += Seg.vmsize; + else if (SegmentName == "__DATA") + total_data += Seg.vmsize; + else if (SegmentName == "__OBJC") + total_objc += Seg.vmsize; + else + total_others += Seg.vmsize; + } + } else if (Load.C.cmd == MachO::LC_SEGMENT) { + MachO::segment_command Seg = MachO->getSegmentLoadCommand(Load); + if (MachO->getHeader().filetype == MachO::MH_OBJECT) { + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section Sec = MachO->getSection(Load, J); + StringRef SegmentName = StringRef(Sec.segname); + if (SegmentName == "__TEXT") + total_text += Sec.size; + else if (SegmentName == "__DATA") + total_data += Sec.size; + else if (SegmentName == "__OBJC") + total_objc += Sec.size; + else + total_others += Sec.size; + } + } else { + StringRef SegmentName = StringRef(Seg.segname); + if (SegmentName == "__TEXT") + total_text += Seg.vmsize; + else if (SegmentName == "__DATA") + total_data += Seg.vmsize; + else if (SegmentName == "__OBJC") + total_objc += Seg.vmsize; + else + total_others += Seg.vmsize; + } + } + } + uint64_t total = total_text + total_data + total_objc + total_others; + + if (!BerkeleyHeaderPrinted) { + outs() << "__TEXT\t__DATA\t__OBJC\tothers\tdec\thex\n"; + BerkeleyHeaderPrinted = true; + } + outs() << total_text << "\t" << total_data << "\t" << total_objc << "\t" + << total_others << "\t" << total << "\t" << format("%" PRIx64, total) + << "\t"; +} + +/// Print the size of each section in @p Obj. +/// +/// The format used is determined by @c OutputFormat and @c Radix. +static void printObjectSectionSizes(ObjectFile *Obj) { + uint64_t total = 0; + std::string fmtbuf; + raw_string_ostream fmt(fmtbuf); + const char *radix_fmt = getRadixFmt(); + + // If OutputFormat is darwin and we have a MachOObjectFile print as darwin's + // size(1) -m output, else if OutputFormat is darwin and not a Mach-O object + // let it fall through to OutputFormat berkeley. + MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(Obj); + if (OutputFormat == darwin && MachO) + printDarwinSectionSizes(MachO); + // If we have a MachOObjectFile and the OutputFormat is berkeley print as + // darwin's default berkeley format for Mach-O files. + else if (MachO && OutputFormat == berkeley) + printDarwinSegmentSizes(MachO); + else if (OutputFormat == sysv) { + // Run two passes over all sections. The first gets the lengths needed for + // formatting the output. The second actually does the output. + std::size_t max_name_len = strlen("section"); + std::size_t max_size_len = strlen("size"); + std::size_t max_addr_len = strlen("addr"); + for (const SectionRef &Section : Obj->sections()) { + if (!considerForSize(Obj, Section)) + continue; + uint64_t size = Section.getSize(); + total += size; + + Expected<StringRef> name_or_err = Section.getName(); + if (!name_or_err) { + error(name_or_err.takeError(), Obj->getFileName()); + return; + } + + uint64_t addr = Section.getAddress(); + max_name_len = std::max(max_name_len, name_or_err->size()); + max_size_len = std::max(max_size_len, getNumLengthAsString(size)); + max_addr_len = std::max(max_addr_len, getNumLengthAsString(addr)); + } + + // Add extra padding. + max_name_len += 2; + max_size_len += 2; + max_addr_len += 2; + + // Setup header format. + fmt << "%-" << max_name_len << "s " + << "%" << max_size_len << "s " + << "%" << max_addr_len << "s\n"; + + // Print header + outs() << format(fmt.str().c_str(), static_cast<const char *>("section"), + static_cast<const char *>("size"), + static_cast<const char *>("addr")); + fmtbuf.clear(); + + // Setup per section format. + fmt << "%-" << max_name_len << "s " + << "%#" << max_size_len << radix_fmt << " " + << "%#" << max_addr_len << radix_fmt << "\n"; + + // Print each section. + for (const SectionRef &Section : Obj->sections()) { + if (!considerForSize(Obj, Section)) + continue; + + Expected<StringRef> name_or_err = Section.getName(); + if (!name_or_err) { + error(name_or_err.takeError(), Obj->getFileName()); + return; + } + + uint64_t size = Section.getSize(); + uint64_t addr = Section.getAddress(); + outs() << format(fmt.str().c_str(), name_or_err->str().c_str(), size, addr); + } + + if (ELFCommons) { + uint64_t CommonSize = getCommonSize(Obj); + total += CommonSize; + outs() << format(fmt.str().c_str(), std::string("*COM*").c_str(), + CommonSize, static_cast<uint64_t>(0)); + } + + // Print total. + fmtbuf.clear(); + fmt << "%-" << max_name_len << "s " + << "%#" << max_size_len << radix_fmt << "\n"; + outs() << format(fmt.str().c_str(), static_cast<const char *>("Total"), + total) + << "\n\n"; + } else { + // The Berkeley format does not display individual section sizes. It + // displays the cumulative size for each section type. + uint64_t total_text = 0; + uint64_t total_data = 0; + uint64_t total_bss = 0; + + // Make one pass over the section table to calculate sizes. + for (const SectionRef &Section : Obj->sections()) { + uint64_t size = Section.getSize(); + bool isText = Section.isBerkeleyText(); + bool isData = Section.isBerkeleyData(); + bool isBSS = Section.isBSS(); + if (isText) + total_text += size; + else if (isData) + total_data += size; + else if (isBSS) + total_bss += size; + } + + if (ELFCommons) + total_bss += getCommonSize(Obj); + + total = total_text + total_data + total_bss; + + if (TotalSizes) { + TotalObjectText += total_text; + TotalObjectData += total_data; + TotalObjectBss += total_bss; + TotalObjectTotal += total; + } + + if (!BerkeleyHeaderPrinted) { + outs() << " text\t" + " data\t" + " bss\t" + " " + << (Radix == octal ? "oct" : "dec") + << "\t" + " hex\t" + "filename\n"; + BerkeleyHeaderPrinted = true; + } + + // Print result. + fmt << "%#7" << radix_fmt << "\t" + << "%#7" << radix_fmt << "\t" + << "%#7" << radix_fmt << "\t"; + outs() << format(fmt.str().c_str(), total_text, total_data, total_bss); + fmtbuf.clear(); + fmt << "%7" << (Radix == octal ? PRIo64 : PRIu64) << "\t" + << "%7" PRIx64 "\t"; + outs() << format(fmt.str().c_str(), total, total); + } +} + +/// Checks to see if the @p O ObjectFile is a Mach-O file and if it is and there +/// is a list of architecture flags specified then check to make sure this +/// Mach-O file is one of those architectures or all architectures was +/// specificed. If not then an error is generated and this routine returns +/// false. Else it returns true. +static bool checkMachOAndArchFlags(ObjectFile *O, StringRef Filename) { + auto *MachO = dyn_cast<MachOObjectFile>(O); + + if (!MachO || ArchAll || ArchFlags.empty()) + return true; + + MachO::mach_header H; + MachO::mach_header_64 H_64; + Triple T; + if (MachO->is64Bit()) { + H_64 = MachO->MachOObjectFile::getHeader64(); + T = MachOObjectFile::getArchTriple(H_64.cputype, H_64.cpusubtype); + } else { + H = MachO->MachOObjectFile::getHeader(); + T = MachOObjectFile::getArchTriple(H.cputype, H.cpusubtype); + } + if (none_of(ArchFlags, [&](const std::string &Name) { + return Name == T.getArchName(); + })) { + error("no architecture specified", Filename); + return false; + } + return true; +} + +/// Print the section sizes for @p file. If @p file is an archive, print the +/// section sizes for each archive member. +static void printFileSectionSizes(StringRef file) { + + // Attempt to open the binary. + Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(file); + if (!BinaryOrErr) { + error(BinaryOrErr.takeError(), file); + return; + } + Binary &Bin = *BinaryOrErr.get().getBinary(); + + if (Archive *a = dyn_cast<Archive>(&Bin)) { + // This is an archive. Iterate over each member and display its sizes. + Error Err = Error::success(); + for (auto &C : a->children(Err)) { + Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(); + if (!ChildOrErr) { + if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) + error(std::move(E), a->getFileName(), C); + continue; + } + if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get())) { + MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o); + if (!checkMachOAndArchFlags(o, file)) + return; + if (OutputFormat == sysv) + outs() << o->getFileName() << " (ex " << a->getFileName() << "):\n"; + else if (MachO && OutputFormat == darwin) + outs() << a->getFileName() << "(" << o->getFileName() << "):\n"; + printObjectSectionSizes(o); + if (OutputFormat == berkeley) { + if (MachO) + outs() << a->getFileName() << "(" << o->getFileName() << ")\n"; + else + outs() << o->getFileName() << " (ex " << a->getFileName() << ")\n"; + } + } + } + if (Err) + error(std::move(Err), a->getFileName()); + } else if (MachOUniversalBinary *UB = + dyn_cast<MachOUniversalBinary>(&Bin)) { + // If we have a list of architecture flags specified dump only those. + if (!ArchAll && !ArchFlags.empty()) { + // Look for a slice in the universal binary that matches each ArchFlag. + bool ArchFound; + for (unsigned i = 0; i < ArchFlags.size(); ++i) { + ArchFound = false; + for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), + E = UB->end_objects(); + I != E; ++I) { + if (ArchFlags[i] == I->getArchFlagName()) { + ArchFound = true; + Expected<std::unique_ptr<ObjectFile>> UO = I->getAsObjectFile(); + if (UO) { + if (ObjectFile *o = dyn_cast<ObjectFile>(&*UO.get())) { + MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o); + if (OutputFormat == sysv) + outs() << o->getFileName() << " :\n"; + else if (MachO && OutputFormat == darwin) { + if (MoreThanOneFile || ArchFlags.size() > 1) + outs() << o->getFileName() << " (for architecture " + << I->getArchFlagName() << "): \n"; + } + printObjectSectionSizes(o); + if (OutputFormat == berkeley) { + if (!MachO || MoreThanOneFile || ArchFlags.size() > 1) + outs() << o->getFileName() << " (for architecture " + << I->getArchFlagName() << ")"; + outs() << "\n"; + } + } + } else if (auto E = isNotObjectErrorInvalidFileType( + UO.takeError())) { + error(std::move(E), file, ArchFlags.size() > 1 ? + StringRef(I->getArchFlagName()) : StringRef()); + return; + } else if (Expected<std::unique_ptr<Archive>> AOrErr = + I->getAsArchive()) { + std::unique_ptr<Archive> &UA = *AOrErr; + // This is an archive. Iterate over each member and display its + // sizes. + Error Err = Error::success(); + for (auto &C : UA->children(Err)) { + Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(); + if (!ChildOrErr) { + if (auto E = isNotObjectErrorInvalidFileType( + ChildOrErr.takeError())) + error(std::move(E), UA->getFileName(), C, + ArchFlags.size() > 1 ? + StringRef(I->getArchFlagName()) : StringRef()); + continue; + } + if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get())) { + MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o); + if (OutputFormat == sysv) + outs() << o->getFileName() << " (ex " << UA->getFileName() + << "):\n"; + else if (MachO && OutputFormat == darwin) + outs() << UA->getFileName() << "(" << o->getFileName() + << ")" + << " (for architecture " << I->getArchFlagName() + << "):\n"; + printObjectSectionSizes(o); + if (OutputFormat == berkeley) { + if (MachO) { + outs() << UA->getFileName() << "(" << o->getFileName() + << ")"; + if (ArchFlags.size() > 1) + outs() << " (for architecture " << I->getArchFlagName() + << ")"; + outs() << "\n"; + } else + outs() << o->getFileName() << " (ex " << UA->getFileName() + << ")\n"; + } + } + } + if (Err) + error(std::move(Err), UA->getFileName()); + } else { + consumeError(AOrErr.takeError()); + error("mach-o universal file for architecture " + + StringRef(I->getArchFlagName()) + + " is not a mach-o file or an archive file", + file); + } + } + } + if (!ArchFound) { + error("file does not contain architecture " + ArchFlags[i], file); + return; + } + } + return; + } + // No architecture flags were specified so if this contains a slice that + // matches the host architecture dump only that. + if (!ArchAll) { + StringRef HostArchName = MachOObjectFile::getHostArch().getArchName(); + for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), + E = UB->end_objects(); + I != E; ++I) { + if (HostArchName == I->getArchFlagName()) { + Expected<std::unique_ptr<ObjectFile>> UO = I->getAsObjectFile(); + if (UO) { + if (ObjectFile *o = dyn_cast<ObjectFile>(&*UO.get())) { + MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o); + if (OutputFormat == sysv) + outs() << o->getFileName() << " :\n"; + else if (MachO && OutputFormat == darwin) { + if (MoreThanOneFile) + outs() << o->getFileName() << " (for architecture " + << I->getArchFlagName() << "):\n"; + } + printObjectSectionSizes(o); + if (OutputFormat == berkeley) { + if (!MachO || MoreThanOneFile) + outs() << o->getFileName() << " (for architecture " + << I->getArchFlagName() << ")"; + outs() << "\n"; + } + } + } else if (auto E = isNotObjectErrorInvalidFileType(UO.takeError())) { + error(std::move(E), file); + return; + } else if (Expected<std::unique_ptr<Archive>> AOrErr = + I->getAsArchive()) { + std::unique_ptr<Archive> &UA = *AOrErr; + // This is an archive. Iterate over each member and display its + // sizes. + Error Err = Error::success(); + for (auto &C : UA->children(Err)) { + Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(); + if (!ChildOrErr) { + if (auto E = isNotObjectErrorInvalidFileType( + ChildOrErr.takeError())) + error(std::move(E), UA->getFileName(), C); + continue; + } + if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get())) { + MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o); + if (OutputFormat == sysv) + outs() << o->getFileName() << " (ex " << UA->getFileName() + << "):\n"; + else if (MachO && OutputFormat == darwin) + outs() << UA->getFileName() << "(" << o->getFileName() << ")" + << " (for architecture " << I->getArchFlagName() + << "):\n"; + printObjectSectionSizes(o); + if (OutputFormat == berkeley) { + if (MachO) + outs() << UA->getFileName() << "(" << o->getFileName() + << ")\n"; + else + outs() << o->getFileName() << " (ex " << UA->getFileName() + << ")\n"; + } + } + } + if (Err) + error(std::move(Err), UA->getFileName()); + } else { + consumeError(AOrErr.takeError()); + error("mach-o universal file for architecture " + + StringRef(I->getArchFlagName()) + + " is not a mach-o file or an archive file", + file); + } + return; + } + } + } + // Either all architectures have been specified or none have been specified + // and this does not contain the host architecture so dump all the slices. + bool MoreThanOneArch = UB->getNumberOfObjects() > 1; + for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), + E = UB->end_objects(); + I != E; ++I) { + Expected<std::unique_ptr<ObjectFile>> UO = I->getAsObjectFile(); + if (UO) { + if (ObjectFile *o = dyn_cast<ObjectFile>(&*UO.get())) { + MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o); + if (OutputFormat == sysv) + outs() << o->getFileName() << " :\n"; + else if (MachO && OutputFormat == darwin) { + if (MoreThanOneFile || MoreThanOneArch) + outs() << o->getFileName() << " (for architecture " + << I->getArchFlagName() << "):"; + outs() << "\n"; + } + printObjectSectionSizes(o); + if (OutputFormat == berkeley) { + if (!MachO || MoreThanOneFile || MoreThanOneArch) + outs() << o->getFileName() << " (for architecture " + << I->getArchFlagName() << ")"; + outs() << "\n"; + } + } + } else if (auto E = isNotObjectErrorInvalidFileType(UO.takeError())) { + error(std::move(E), file, MoreThanOneArch ? + StringRef(I->getArchFlagName()) : StringRef()); + return; + } else if (Expected<std::unique_ptr<Archive>> AOrErr = + I->getAsArchive()) { + std::unique_ptr<Archive> &UA = *AOrErr; + // This is an archive. Iterate over each member and display its sizes. + Error Err = Error::success(); + for (auto &C : UA->children(Err)) { + Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(); + if (!ChildOrErr) { + if (auto E = isNotObjectErrorInvalidFileType( + ChildOrErr.takeError())) + error(std::move(E), UA->getFileName(), C, MoreThanOneArch ? + StringRef(I->getArchFlagName()) : StringRef()); + continue; + } + if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get())) { + MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o); + if (OutputFormat == sysv) + outs() << o->getFileName() << " (ex " << UA->getFileName() + << "):\n"; + else if (MachO && OutputFormat == darwin) + outs() << UA->getFileName() << "(" << o->getFileName() << ")" + << " (for architecture " << I->getArchFlagName() << "):\n"; + printObjectSectionSizes(o); + if (OutputFormat == berkeley) { + if (MachO) + outs() << UA->getFileName() << "(" << o->getFileName() << ")" + << " (for architecture " << I->getArchFlagName() + << ")\n"; + else + outs() << o->getFileName() << " (ex " << UA->getFileName() + << ")\n"; + } + } + } + if (Err) + error(std::move(Err), UA->getFileName()); + } else { + consumeError(AOrErr.takeError()); + error("mach-o universal file for architecture " + + StringRef(I->getArchFlagName()) + + " is not a mach-o file or an archive file", + file); + } + } + } else if (ObjectFile *o = dyn_cast<ObjectFile>(&Bin)) { + if (!checkMachOAndArchFlags(o, file)) + return; + MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o); + if (OutputFormat == sysv) + outs() << o->getFileName() << " :\n"; + else if (MachO && OutputFormat == darwin && MoreThanOneFile) + outs() << o->getFileName() << ":\n"; + printObjectSectionSizes(o); + if (OutputFormat == berkeley) { + if (!MachO || MoreThanOneFile) + outs() << o->getFileName(); + outs() << "\n"; + } + } else { + error("unsupported file type", file); + } +} + +static void printBerkeleyTotals() { + std::string fmtbuf; + raw_string_ostream fmt(fmtbuf); + const char *radix_fmt = getRadixFmt(); + fmt << "%#7" << radix_fmt << "\t" + << "%#7" << radix_fmt << "\t" + << "%#7" << radix_fmt << "\t"; + outs() << format(fmt.str().c_str(), TotalObjectText, TotalObjectData, + TotalObjectBss); + fmtbuf.clear(); + fmt << "%7" << (Radix == octal ? PRIo64 : PRIu64) << "\t" + << "%7" PRIx64 "\t"; + outs() << format(fmt.str().c_str(), TotalObjectTotal, TotalObjectTotal) + << "(TOTALS)\n"; +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + cl::HideUnrelatedOptions(SizeCat); + cl::ParseCommandLineOptions(argc, argv, "llvm object size dumper\n"); + + ToolName = argv[0]; + if (OutputFormatShort.getNumOccurrences()) + OutputFormat = static_cast<OutputFormatTy>(OutputFormatShort); + if (RadixShort.getNumOccurrences()) + Radix = RadixShort.getValue(); + + for (StringRef Arch : ArchFlags) { + if (Arch == "all") { + ArchAll = true; + } else { + if (!MachOObjectFile::isValidArch(Arch)) { + outs() << ToolName << ": for the -arch option: Unknown architecture " + << "named '" << Arch << "'"; + return 1; + } + } + } + + if (InputFilenames.empty()) + InputFilenames.push_back("a.out"); + + MoreThanOneFile = InputFilenames.size() > 1; + llvm::for_each(InputFilenames, printFileSectionSizes); + if (OutputFormat == berkeley && TotalSizes) + printBerkeleyTotals(); + + if (HadError) + return 1; +} diff --git a/contrib/llvm-project/llvm/tools/llvm-strings/llvm-strings.cpp b/contrib/llvm-project/llvm/tools/llvm-strings/llvm-strings.cpp new file mode 100644 index 000000000000..51313d73401e --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-strings/llvm-strings.cpp @@ -0,0 +1,120 @@ +//===-- llvm-strings.cpp - Printable String dumping utility ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This program is a utility that works like binutils "strings", that is, it +// prints out printable strings in a binary, objdump, or archive file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Binary.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Program.h" +#include <cctype> +#include <string> + +using namespace llvm; +using namespace llvm::object; + +static cl::list<std::string> InputFileNames(cl::Positional, + cl::desc("<input object files>"), + cl::ZeroOrMore); + +static cl::opt<bool> + PrintFileName("print-file-name", + cl::desc("Print the name of the file before each string")); +static cl::alias PrintFileNameShort("f", cl::desc(""), + cl::aliasopt(PrintFileName)); + +static cl::opt<int> + MinLength("bytes", cl::desc("Print sequences of the specified length"), + cl::init(4)); +static cl::alias MinLengthShort("n", cl::desc(""), cl::aliasopt(MinLength)); + +static cl::opt<bool> + AllSections("all", + cl::desc("Check all sections, not just the data section")); +static cl::alias AllSectionsShort("a", cl::desc(""), + cl::aliasopt(AllSections)); + +enum radix { none, octal, hexadecimal, decimal }; +static cl::opt<radix> + Radix("radix", cl::desc("print the offset within the file"), + cl::values(clEnumValN(octal, "o", "octal"), + clEnumValN(hexadecimal, "x", "hexadecimal"), + clEnumValN(decimal, "d", "decimal")), + cl::init(none)); +static cl::alias RadixShort("t", cl::desc(""), cl::aliasopt(Radix)); + +static cl::extrahelp + HelpResponse("\nPass @FILE as argument to read options from FILE.\n"); + +static void strings(raw_ostream &OS, StringRef FileName, StringRef Contents) { + auto print = [&OS, FileName](unsigned Offset, StringRef L) { + if (L.size() < static_cast<size_t>(MinLength)) + return; + if (PrintFileName) + OS << FileName << ": "; + switch (Radix) { + case none: + break; + case octal: + OS << format("%7o ", Offset); + break; + case hexadecimal: + OS << format("%7x ", Offset); + break; + case decimal: + OS << format("%7u ", Offset); + break; + } + OS << L << '\n'; + }; + + const char *B = Contents.begin(); + const char *P = nullptr, *E = nullptr, *S = nullptr; + for (P = Contents.begin(), E = Contents.end(); P < E; ++P) { + if (isPrint(*P) || *P == '\t') { + if (S == nullptr) + S = P; + } else if (S) { + print(S - B, StringRef(S, P - S)); + S = nullptr; + } + } + if (S) + print(S - B, StringRef(S, E - S)); +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "llvm string dumper\n"); + if (MinLength == 0) { + errs() << "invalid minimum string length 0\n"; + return EXIT_FAILURE; + } + + if (InputFileNames.empty()) + InputFileNames.push_back("-"); + + for (const auto &File : InputFileNames) { + ErrorOr<std::unique_ptr<MemoryBuffer>> Buffer = + MemoryBuffer::getFileOrSTDIN(File); + if (std::error_code EC = Buffer.getError()) + errs() << File << ": " << EC.message() << '\n'; + else + strings(llvm::outs(), File == "-" ? "{standard input}" : File, + Buffer.get()->getMemBufferRef().getBuffer()); + } + + return EXIT_SUCCESS; +} |