diff options
73 files changed, 864 insertions, 196 deletions
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 5c8af65326ed..9d53b5b923bb 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -266,6 +266,9 @@ CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer. CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer. CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate. +/// Treat loops as finite: language, always, never. +ENUM_CODEGENOPT(FiniteLoops, FiniteLoopsKind, 2, FiniteLoopsKind::Language) + /// Attempt to use register sized accesses to bit-fields in structures, when /// possible. CODEGENOPT(UseRegisterSizedBitfieldAccess , 1, 0) diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 73d41e3293c6..c550817f0f69 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -140,6 +140,12 @@ public: All, // Keep all frame pointers. }; + enum FiniteLoopsKind { + Language, // Not specified, use language standard. + Always, // All loops are assumed to be finite. + Never, // No loop is assumed to be finite. + }; + /// The code model to use (-mcmodel). std::string CodeModel; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1f6c13d5cc96..817798926650 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2410,6 +2410,11 @@ def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>, defm reroll_loops : BoolFOption<"reroll-loops", CodeGenOpts<"RerollLoops">, DefaultFalse, PosFlag<SetTrue, [CC1Option], "Turn on loop reroller">, NegFlag<SetFalse>>; +def ffinite_loops: Flag<["-"], "ffinite-loops">, Group<f_Group>, + HelpText<"Assume all loops are finite.">, Flags<[CC1Option]>; +def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group<f_Group>, + HelpText<"Do not assume that any loop is finite.">, Flags<[CC1Option]>; + def ftrigraphs : Flag<["-"], "ftrigraphs">, Group<f_Group>, HelpText<"Process trigraph sequences">, Flags<[CC1Option]>; def fno_trigraphs : Flag<["-"], "fno-trigraphs">, Group<f_Group>, diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 42801372189b..bc7582c67989 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1995,9 +1995,14 @@ void CodeGenModule::ConstructAttributeList( if (TargetDecl->hasAttr<ConstAttr>()) { FuncAttrs.addAttribute(llvm::Attribute::ReadNone); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + // gcc specifies that 'const' functions have greater restrictions than + // 'pure' functions, so they also cannot have infinite loops. + FuncAttrs.addAttribute(llvm::Attribute::WillReturn); } else if (TargetDecl->hasAttr<PureAttr>()) { FuncAttrs.addAttribute(llvm::Attribute::ReadOnly); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + // gcc specifies that 'pure' functions cannot have infinite loops. + FuncAttrs.addAttribute(llvm::Attribute::WillReturn); } else if (TargetDecl->hasAttr<NoAliasAttr>()) { FuncAttrs.addAttribute(llvm::Attribute::ArgMemOnly); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 57cc2d60e2af..83dfa0780547 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9892,7 +9892,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), + CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), Args); } }; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8eb7adbc8fcb..95c0b7b4d7c0 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -507,12 +507,23 @@ public: /// True if the C++ Standard Requires Progress. bool CPlusPlusWithProgress() { + if (CGM.getCodeGenOpts().getFiniteLoops() == + CodeGenOptions::FiniteLoopsKind::Never) + return false; + return getLangOpts().CPlusPlus11 || getLangOpts().CPlusPlus14 || getLangOpts().CPlusPlus17 || getLangOpts().CPlusPlus20; } /// True if the C Standard Requires Progress. bool CWithProgress() { + if (CGM.getCodeGenOpts().getFiniteLoops() == + CodeGenOptions::FiniteLoopsKind::Always) + return true; + if (CGM.getCodeGenOpts().getFiniteLoops() == + CodeGenOptions::FiniteLoopsKind::Never) + return false; + return getLangOpts().C11 || getLangOpts().C17 || getLangOpts().C2x; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f8e637974662..1976b48e0f6a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5620,6 +5620,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (A->getOption().matches(options::OPT_freroll_loops)) CmdArgs.push_back("-freroll-loops"); + Args.AddLastArg(CmdArgs, options::OPT_ffinite_loops, + options::OPT_fno_finite_loops); + Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings); Args.AddLastArg(CmdArgs, options::OPT_funroll_loops, options::OPT_fno_unroll_loops); diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index f4b7a57e0bb7..13943b6c404a 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -11,6 +11,7 @@ #include "Darwin.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/Version.h" +#include "clang/Config/config.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" @@ -520,7 +521,10 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, // translate 'lld' into 'lld-link', and in the case of the regular msvc // linker, we need to use a special search algorithm. llvm::SmallString<128> linkPath; - StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link"); + StringRef Linker + = Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER); + if (Linker.empty()) + Linker = "link"; if (Linker.equals_lower("lld")) Linker = "lld-link"; diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index f155d74632f9..e162165b2561 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -296,6 +296,7 @@ void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args, CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++"); CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi"); + CmdArgs.push_back(Profiling ? "-lpthread_p" : "-lpthread"); } std::string OpenBSD::getCompilerRT(const ArgList &Args, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 036388ebd355..5c5cf46150e2 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1037,7 +1037,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Opts.UnrollLoops = Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops, (Opts.OptimizationLevel > 1)); - Opts.BinutilsVersion = std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ)); @@ -1324,6 +1323,10 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Opts.EmitVersionIdentMetadata = Args.hasFlag(OPT_Qy, OPT_Qn, true); + if (Args.hasArg(options::OPT_ffinite_loops)) + Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Always; + else if (Args.hasArg(options::OPT_fno_finite_loops)) + Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Never; return Success; } diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index d47ad1b74649..c64a912ce919 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -565,7 +565,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_aggregate_bases", "201603L"); Builder.defineMacro("__cpp_structured_bindings", "201606L"); Builder.defineMacro("__cpp_nontype_template_args", - LangOpts.CPlusPlus20 ? "201911L" : "201411L"); + "201411L"); // (not latest) Builder.defineMacro("__cpp_fold_expressions", "201603L"); Builder.defineMacro("__cpp_guaranteed_copy_elision", "201606L"); Builder.defineMacro("__cpp_nontype_template_parameter_auto", "201606L"); diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index c2785fd60fc2..be04970979b3 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -5158,6 +5158,20 @@ private: llvm::DenseMap<const IdentifierInfo *, Member> Results; }; + +// If \p Base is ParenListExpr, assume a chain of comma operators and pick the +// last expr. We expect other ParenListExprs to be resolved to e.g. constructor +// calls before here. (So the ParenListExpr should be nonempty, but check just +// in case) +Expr *unwrapParenList(Expr *Base) { + if (auto *PLE = llvm::dyn_cast_or_null<ParenListExpr>(Base)) { + if (PLE->getNumExprs() == 0) + return nullptr; + Base = PLE->getExpr(PLE->getNumExprs() - 1); + } + return Base; +} + } // namespace void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base, @@ -5165,6 +5179,8 @@ void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base, SourceLocation OpLoc, bool IsArrow, bool IsBaseExprStatement, QualType PreferredType) { + Base = unwrapParenList(Base); + OtherOpBase = unwrapParenList(OtherOpBase); if (!Base || !CodeCompleter) return; @@ -5597,12 +5613,13 @@ ProduceSignatureHelp(Sema &SemaRef, Scope *S, QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn, ArrayRef<Expr *> Args, SourceLocation OpenParLoc) { - if (!CodeCompleter) + Fn = unwrapParenList(Fn); + if (!CodeCompleter || !Fn) return QualType(); // FIXME: Provide support for variadic template functions. // Ignore type-dependent call expressions entirely. - if (!Fn || Fn->isTypeDependent() || anyNullArguments(Args)) + if (Fn->isTypeDependent() || anyNullArguments(Args)) return QualType(); // In presence of dependent args we surface all possible signatures using the // non-dependent args in the prefix. Afterwards we do a post filtering to make diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index ea1403888eba..24ed23bb2b7d 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -24,13 +24,77 @@ Non-comprehensive list of changes in this release ELF Improvements ---------------- -* ``--error-handling-script`` is added to allow for user-defined handlers upon +* ``--dependency-file`` has been added. (Similar to ``cc -M -MF``.) + (`D82437 <https://reviews.llvm.org/D82437>`_) +* ``--error-handling-script`` has been added to allow for user-defined handlers upon missing libraries. (`D87758 <https://reviews.llvm.org/D87758>`_) +* ``--exclude-libs`` can now localize defined version symbols and bitcode referenced libcall symbols. + (`D94280 <https://reviews.llvm.org/D94280>`_) +* ``--gdb-index`` now works with DWARF v5 and ``--icf={safe,all}``. + (`D85579 <https://reviews.llvm.org/D85579>`_) + (`D89751 <https://reviews.llvm.org/D89751>`_) +* ``--gdb-index --emit-relocs`` can now be used together. + (`D94354 <https://reviews.llvm.org/D94354>`_) +* ``--icf={safe,all}`` conservatively no longer fold text sections with LSDA. + Previously ICF on ``-fexceptions`` code could be unsafe. + (`D84610 <https://reviews.llvm.org/D84610>`_) +* ``--icf={safe,all}`` can now fold two sections with relocations referencing aliased symbols. + (`D88830 <https://reviews.llvm.org/D88830>`_) +* ``--lto-pseudo-probe-for-profiling`` has been added. + (`D95056 <https://reviews.llvm.org/D95056>`_) +* ``--no-lto-whole-program-visibility`` has been added. + (`D92060 <https://reviews.llvm.org/D92060>`_) +* ``--oformat-binary`` has been fixed to respect LMA. + (`D85086 <https://reviews.llvm.org/D85086>`_) +* ``--reproduce`` includes ``--lto-sample-profile``, ``--just-symbols``, ``--call-graph-ordering-file``, ``--retain-symbols-file`` files. +* ``-r --gc-sections`` is now supported. + (`D84131 <https://reviews.llvm.org/D84131>`_) +* A ``-u`` specified symbol will no longer change the binding to ``STB_WEAK``. + (`D88945 <https://reviews.llvm.org/D88945>`_) +* ``--wrap`` support has been improved. + + If ``foo`` is not referenced, there is no longer an undefined symbol ``__wrap_foo``. + + If ``__real_foo`` is not referenced, there is no longer an undefined symbol ``foo``. +* ``SHF_LINK_ORDER`` sections can now have zero ``sh_link`` values. +* ``SHF_LINK_ORDER`` and non-``SHF_LINK_ORDER`` sections can now be mixed within an input section description. + (`D84001 <https://reviews.llvm.org/D84001>`_) +* ``LOG2CEIL`` is now supported in linker scripts. + (`D84054 <https://reviews.llvm.org/D84054>`_) +* ``DEFINED`` has been fixed to check whether the symbol is defined. + (`D83758 <https://reviews.llvm.org/D83758>`_) +* An input section description may now have multiple ``SORT_*``. + The matched sections are ordered by radix sort with the keys being ``(SORT*, --sort-section, input order)``. + (`D91127 <https://reviews.llvm.org/D91127>`_) +* Users can now provide a GNU style linker script to convert ``.ctors`` into ``.init_array``. + (`D91187 <https://reviews.llvm.org/D91187>`_) +* An empty output section can now be discarded even if it is assigned to a program header. + (`D92301 <https://reviews.llvm.org/D92301>`_) +* Non-``SHF_ALLOC`` sections now have larger file offsets than ``SHF_ALLOC`` sections. + (`D85867 <https://reviews.llvm.org/D85867>`_) +* Some symbol versioning improvements. + + Defined ``foo@@v1`` now resolve undefined ``foo@v1`` (`D92259 <https://reviews.llvm.org/D92259>`_) + + Undefined ``foo@v1`` now gets an error (`D92260 <https://reviews.llvm.org/D92260>`_) +* The AArch64 port now has support for ``STO_AARCH64_VARIANT_PCS`` and ``DT_AARCH64_VARIANT_PCS``. + (`D93045 <https://reviews.llvm.org/D93045>`_) +* The AArch64 port now has support for ``R_AARCH64_LD64_GOTPAGE_LO15``. +* The PowerPC64 port now detects missing R_PPC64_TLSGD/R_PPC64_TLSLD and disables TLS relaxation. + This allows linking with object files produced by very old IBM XL compilers. + (`D92959 <https://reviews.llvm.org/D92959>`_) +* Many PowerPC PC-relative relocations are now supported. +* ``R_PPC_ADDR24`` and ``R_PPC64_ADDR16_HIGH`` are now supported. +* powerpcle is now supported. Tested with FreeBSD loader and freestanding. + (`D93917 <https://reviews.llvm.org/D93917>`_) +* RISC-V: the first ``SHT_RISCV_ATTRIBUTES`` section is now retained. + (`D86309 <https://reviews.llvm.org/D86309>`_) +* LTO pipeline now defaults to the new PM if the CMake variable ``ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER`` is on. + (`D92885 <https://reviews.llvm.org/D92885>`_) Breaking changes ---------------- -* ... +* A COMMON symbol can now cause the fetch of an archive providing a ``STB_GLOBAL`` definition. + This behavior follows GNU ld newer than December 1999. + If you see ``duplicate symbol`` errors with the new behavior, check out `PR49226 <https://bugs.llvm.org//show_bug.cgi?id=49226>`_. + (`D86142 <https://reviews.llvm.org/D86142>`_) COFF Improvements ----------------- @@ -58,10 +122,26 @@ MinGW Improvements (`D93950 <https://reviews.llvm.org/D93950>`_) -MachO Improvements +Mach-O Improvements ------------------ -* Item 1. +We've gotten the new implementation of LLD for Mach-O to the point where it is +able to link large x86_64 programs, and we'd love to get some alpha testing on +it. The new Darwin back-end can be invoked as follows: + +.. code-block:: + clang -fuse-ld=lld.darwinnew /path/to/file.c + +To reach this point, we implemented numerous features, and it's easier to list +the major features we *haven't* yet completed: + +* LTO support +* Stack unwinding for exceptions +* Support for arm64, arm, and i386 architectures + +If you stumble upon an issue and it doesn't fall into one of these categories, +please file a bug report! + WebAssembly Improvements ------------------------ diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index 183107c148a6..9beef44c89dd 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -339,8 +339,7 @@ LLVMErrorRef LLVMOrcResourceTrackerRemove(LLVMOrcResourceTrackerRef RT); * ownership has not been passed to a JITDylib (e.g. because some error * prevented the client from calling LLVMOrcJITDylibAddGenerator). */ -void LLVMOrcDisposeDefinitionGenerator( - LLVMOrcDefinitionGeneratorRef DG); +void LLVMOrcDisposeDefinitionGenerator(LLVMOrcDefinitionGeneratorRef DG); /** * Dispose of a MaterializationUnit. @@ -388,7 +387,9 @@ LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES, * Returns the JITDylib with the given name, or NULL if no such JITDylib * exists. */ -LLVMOrcJITDylibRef LLVMOrcExecutionSessionGetJITDylibByName(const char *Name); +LLVMOrcJITDylibRef +LLVMOrcExecutionSessionGetJITDylibByName(LLVMOrcExecutionSessionRef ES, + const char *Name); /** * Return a reference to a newly created resource tracker associated with JD. diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index 81c1d6aad49a..26bf4ab2618c 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -490,7 +490,10 @@ protected: /// - \c Add has a constant operand. bool canFoldAddIntoGEP(const User *GEP, const Value *Add); - /// Test whether the given value has exactly one use. + /// Test whether the register associated with this value has exactly one use, + /// in which case that single use is killing. Note that multiple IR values + /// may map onto the same register, in which case this is not the same as + /// checking that an IR value has one use. bool hasTrivialKill(const Value *V); /// Create a machine mem operand from the given instruction. diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 6bbe2d03f9e5..f8d97c2c07a6 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1156,6 +1156,10 @@ public: return getOpcode() == TargetOpcode::CFI_INSTRUCTION; } + bool isPseudoProbe() const { + return getOpcode() == TargetOpcode::PSEUDO_PROBE; + } + // True if the instruction represents a position in the function. bool isPosition() const { return isLabel() || isCFIInstruction(); } @@ -1165,6 +1169,9 @@ public: bool isDebugInstr() const { return isDebugValue() || isDebugLabel() || isDebugRef(); } + bool isDebugOrPseudoInstr() const { + return isDebugInstr() || isPseudoProbe(); + } bool isDebugOffsetImm() const { return getDebugOffset().isImm(); } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 844046167975..75d360bf4237 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -375,7 +375,7 @@ __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr, __OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32, /* omp_allocator_handle_t */ VoidPtr) -__OMP_RTL(__kmpc_push_target_tripcount, false, Void, IdentPtr, Int64, Int64) +__OMP_RTL(__kmpc_push_target_tripcount_mapper, false, Void, IdentPtr, Int64, Int64) __OMP_RTL(__tgt_target_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) __OMP_RTL(__tgt_target_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, @@ -844,7 +844,7 @@ __OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {}) __OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__kmpc_push_target_tripcount_mapper, SetterAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {}) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index f42ef48de6b3..955ac8e537fe 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1757,9 +1757,6 @@ public: return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly); } - /// Returns true if this function is guaranteed to return. - bool willReturn() const { return hasFnAttr(Attribute::WillReturn); } - void setOnlyReadsMemory() { addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly); } diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index d2a55f89fac9..b99dc62bbb9d 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -633,6 +633,10 @@ public: /// generated program. bool isSafeToRemove() const; + /// Return true if the instruction will return (unwinding is considered as + /// a form of returning control flow here). + bool willReturn() const; + /// Return true if the instruction is a variety of EH-block. bool isEHPad() const { switch (getOpcode()) { @@ -650,6 +654,9 @@ public: /// llvm.lifetime.end marker. bool isLifetimeStartOrEnd() const; + /// Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst. + bool isDebugOrPseudoInst() const; + /// Return a pointer to the next non-debug instruction in the same basic /// block as 'this', or nullptr if no such instruction exists. Skip any pseudo /// operations if \c SkipPseudoOp is true. diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 0b87416befe9..9a4480b75a30 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -667,6 +667,12 @@ struct AAMDNodes { /// The tag specifying the noalias scope. MDNode *NoAlias = nullptr; + // Shift tbaa Metadata node to start off bytes later + static MDNode *ShiftTBAA(MDNode *M, size_t off); + + // Shift tbaa.struct Metadata node to start off bytes later + static MDNode *ShiftTBAAStruct(MDNode *M, size_t off); + /// Given two sets of AAMDNodes that apply to the same pointer, /// give the best AAMDNodes that are compatible with both (i.e. a set of /// nodes whose allowable aliasing conclusions are a subset of those @@ -680,6 +686,18 @@ struct AAMDNodes { Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr; return Result; } + + /// Create a new AAMDNode that describes this AAMDNode after applying a + /// constant offset to the start of the pointer + AAMDNodes shift(size_t Offset) { + AAMDNodes Result; + Result.TBAA = TBAA ? ShiftTBAA(TBAA, Offset) : nullptr; + Result.TBAAStruct = + TBAAStruct ? ShiftTBAAStruct(TBAAStruct, Offset) : nullptr; + Result.Scope = Scope; + Result.NoAlias = NoAlias; + return Result; + } }; // Specialize DenseMapInfo for AAMDNodes. diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index acfacbd6c74e..945f7e46e142 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -568,6 +568,11 @@ public: bool accumulateConstantOffset( const DataLayout &DL, APInt &Offset, function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr) const; + + static bool accumulateConstantOffset( + Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL, + APInt &Offset, + function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr); }; class PtrToIntOperator diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h index 6bb5825339ae..55b94b2e690d 100644 --- a/llvm/include/llvm/ProfileData/ProfileCommon.h +++ b/llvm/include/llvm/ProfileData/ProfileCommon.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Error.h" #include <algorithm> #include <cstdint> @@ -89,6 +90,8 @@ public: void addRecord(const sampleprof::FunctionSamples &FS, bool isCallsiteSample = false); + std::unique_ptr<ProfileSummary> computeSummaryForProfiles( + const StringMap<sampleprof::FunctionSamples> &Profiles); std::unique_ptr<ProfileSummary> getSummary(); }; diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h index 526e141838c4..da0bdae0eaee 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Instructions.h" #include "llvm/ProfileData/SampleProf.h" @@ -90,6 +91,8 @@ private: // calling context and the context is identified by path from root to the node. class SampleContextTracker { public: + using ContextSamplesTy = SmallSet<FunctionSamples *, 16>; + SampleContextTracker(StringMap<FunctionSamples> &Profiles); // Query context profile for a specific callee with given name at a given // call-site. The full context is identified by location of call instruction. @@ -103,6 +106,9 @@ public: FunctionSamples *getContextSamplesFor(const DILocation *DIL); // Query context profile for a given sample contxt of a function. FunctionSamples *getContextSamplesFor(const SampleContext &Context); + // Get all context profile for given function. + ContextSamplesTy &getAllContextSamplesFor(const Function &Func); + ContextSamplesTy &getAllContextSamplesFor(StringRef Name); // Query base profile for a given function. A base profile is a merged view // of all context profiles for contexts that are not inlined. FunctionSamples *getBaseSamplesFor(const Function &Func, @@ -113,6 +119,9 @@ public: // This makes sure that inlined context profile will be excluded in // function's base profile. void markContextSamplesInlined(const FunctionSamples *InlinedSamples); + void promoteMergeContextSamplesTree(const Instruction &Inst, + StringRef CalleeName); + void addCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap); // Dump the internal context profile trie. void dump(); @@ -126,8 +135,6 @@ private: ContextTrieNode *getTopLevelContextNode(StringRef FName); ContextTrieNode &addTopLevelContextNode(StringRef FName); ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo); - void promoteMergeContextSamplesTree(const Instruction &Inst, - StringRef CalleeName); void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, StringRef ContextStrToRemove); ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode, @@ -135,7 +142,7 @@ private: StringRef ContextStrToRemove); // Map from function name to context profiles (excluding base profile) - StringMap<SmallSet<FunctionSamples *, 16>> FuncToCtxtProfileSet; + StringMap<ContextSamplesTy> FuncToCtxtProfileSet; // Root node for context trie tree ContextTrieNode RootContext; diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index cab893b50d19..0fd79d8ff7f3 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -62,7 +62,7 @@ public: private: // Allow a little bias due the rounding to integral factors. - constexpr static float DistributionFactorVariance = 0.02; + constexpr static float DistributionFactorVariance = 0.02f; // Distribution factors from last pass. FuncProbeFactorMap FunctionProbeFactors; diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 56aaa5d48e2a..aa960c625630 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -274,6 +274,13 @@ void updateProfileCallee( void identifyNoAliasScopesToClone( ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes); +/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified +/// instruction range and extract their scope. These are candidates for +/// duplication when cloning. +void identifyNoAliasScopesToClone( + BasicBlock::iterator Start, BasicBlock::iterator End, + SmallVectorImpl<MDNode *> &NoAliasDeclScopes); + /// Duplicate the specified list of noalias decl scopes. /// The 'Ext' string is added as an extension to the name. /// Afterwards, the ClonedScopes contains the mapping of the original scope diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp index 461fd7239905..dd11b0b02bf8 100644 --- a/llvm/lib/Analysis/DemandedBits.cpp +++ b/llvm/lib/Analysis/DemandedBits.cpp @@ -80,7 +80,7 @@ void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const { static bool isAlwaysLive(Instruction *I) { return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() || - I->mayHaveSideEffects(); + I->mayHaveSideEffects() || !I->willReturn(); } void DemandedBits::determineLiveOperandBits( diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 7f311d8f9a2b..94a24ccf2155 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -243,11 +243,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, if (RecurrenceType->isFloatingPointTy()) { if (!isFloatingPointRecurrenceKind(Kind)) return false; - } else { + } else if (RecurrenceType->isIntegerTy()) { if (!isIntegerRecurrenceKind(Kind)) return false; if (isArithmeticRecurrenceKind(Kind)) Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); + } else { + // Pointer min/max may exist, but it is not supported as a reduction op. + return false; } Worklist.push_back(Start); diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 7d97fc5da9b0..268acb682cf1 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -737,3 +737,84 @@ bool TypeBasedAAWrapperPass::doFinalization(Module &M) { void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } + +MDNode *AAMDNodes::ShiftTBAA(MDNode *MD, size_t Offset) { + // Fast path if there's no offset + if (Offset == 0) + return MD; + // Fast path if there's no path tbaa node (and thus scalar) + if (!isStructPathTBAA(MD)) + return MD; + + TBAAStructTagNode Tag(MD); + SmallVector<Metadata *, 5> Sub; + Sub.push_back(MD->getOperand(0)); + Sub.push_back(MD->getOperand(1)); + ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(2)); + + if (Tag.isNewFormat()) { + ConstantInt *InnerSize = mdconst::extract<ConstantInt>(MD->getOperand(3)); + + if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) { + return nullptr; + } + + uint64_t NewSize = InnerSize->getZExtValue(); + uint64_t NewOffset = InnerOffset->getZExtValue() - Offset; + if (InnerOffset->getZExtValue() < Offset) { + NewOffset = 0; + NewSize -= Offset - InnerOffset->getZExtValue(); + } + + Sub.push_back(ConstantAsMetadata::get( + ConstantInt::get(InnerOffset->getType(), NewOffset))); + + Sub.push_back(ConstantAsMetadata::get( + ConstantInt::get(InnerSize->getType(), NewSize))); + + // immutable type + if (MD->getNumOperands() >= 5) + Sub.push_back(MD->getOperand(4)); + } else { + if (InnerOffset->getZExtValue() < Offset) + return nullptr; + + Sub.push_back(ConstantAsMetadata::get(ConstantInt::get( + InnerOffset->getType(), InnerOffset->getZExtValue() - Offset))); + + // immutable type + if (MD->getNumOperands() >= 4) + Sub.push_back(MD->getOperand(3)); + } + return MDNode::get(MD->getContext(), Sub); +} + +MDNode *AAMDNodes::ShiftTBAAStruct(MDNode *MD, size_t Offset) { + // Fast path if there's no offset + if (Offset == 0) + return MD; + SmallVector<Metadata *, 3> Sub; + for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) { + ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(i)); + ConstantInt *InnerSize = + mdconst::extract<ConstantInt>(MD->getOperand(i + 1)); + // Don't include any triples that aren't in bounds + if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) + continue; + + uint64_t NewSize = InnerSize->getZExtValue(); + uint64_t NewOffset = InnerOffset->getZExtValue() - Offset; + if (InnerOffset->getZExtValue() < Offset) { + NewOffset = 0; + NewSize -= Offset - InnerOffset->getZExtValue(); + } + + // Shift the offset of the triple + Sub.push_back(ConstantAsMetadata::get( + ConstantInt::get(InnerOffset->getType(), NewOffset))); + Sub.push_back(ConstantAsMetadata::get( + ConstantInt::get(InnerSize->getType(), NewSize))); + Sub.push_back(MD->getOperand(i + 2)); + } + return MDNode::get(MD->getContext(), Sub); +}
\ No newline at end of file diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5600a3b33750..e174c5efe424 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5018,36 +5018,14 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { // arbitrary length of time, but programs aren't allowed to rely on that. // If there is no successor, then execution can't transfer to it. - if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) - return !CRI->unwindsToCaller(); - if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) - return !CatchSwitch->unwindsToCaller(); - if (isa<ResumeInst>(I)) - return false; if (isa<ReturnInst>(I)) return false; if (isa<UnreachableInst>(I)) return false; - // Calls can throw, or contain an infinite loop, or kill the process. - if (const auto *CB = dyn_cast<CallBase>(I)) { - // Call sites that throw have implicit non-local control flow. - if (!CB->doesNotThrow()) - return false; - - // A function which doens't throw and has "willreturn" attribute will - // always return. - if (CB->hasFnAttr(Attribute::WillReturn)) - return true; - - // FIXME: Temporarily assume that all side-effect free intrinsics will - // return. Remove this workaround once all intrinsics are appropriately - // annotated. - return isa<IntrinsicInst>(CB) && CB->onlyReadsMemory(); - } - - // Other instructions return normally. - return true; + // An instruction that returns without throwing must transfer control flow + // to a successor. + return !I->mayThrow() && I->willReturn(); } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e7f40523efaf..3178ee16af2b 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1063,6 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_PHI: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + unsigned NumParts = SizeOp0 / NarrowSize; SmallVector<Register, 2> DstRegs(NumParts); SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2); diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp index 26439a656917..7fa14fd902ef 100644 --- a/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -156,7 +156,8 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { // If MI has side effects, it should become a barrier for code motion. // IOM is rebuild from the next instruction to prevent later // instructions from being moved before this MI. - if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) { + if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() && + Next != MBB.end()) { BuildInstOrderMap(Next, IOM); SawStore = false; } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 59d98054e3a2..b6cfd7dcbfbc 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1462,7 +1462,8 @@ bool MachineInstr::hasUnmodeledSideEffects() const { } bool MachineInstr::isLoadFoldBarrier() const { - return mayStore() || isCall() || hasUnmodeledSideEffects(); + return mayStore() || isCall() || + (hasUnmodeledSideEffects() && !isPseudoProbe()); } /// allDefsAreDead - Return true if all the defs of this instruction are dead. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 89670d708264..6a6f83827f72 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6517,8 +6517,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. +// +// The IsRotate flag should be set when the LHS of both shifts is the same. +// Otherwise if matching a general funnel shift, it should be clear. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, - SelectionDAG &DAG) { + SelectionDAG &DAG, bool IsRotate) { // If EltSize is a power of 2 then: // // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) @@ -6550,8 +6553,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // always invokes undefined behavior for 32-bit X. // // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. + // + // NOTE: We can only do this when matching an AND and not a general + // funnel shift. unsigned MaskLoBits = 0; - if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { + if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0)); unsigned Bits = Log2_64(EltSize); @@ -6641,7 +6647,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG, + /*IsRotate*/ true)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg); @@ -6670,7 +6677,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // fold (or (shl x0, (*ext (sub 32, y))), // (srl x1, (*ext y))) -> // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) - if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, HasPos ? Pos : Neg); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 62f7f3d98ba6..0ff77d4ba1ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -261,12 +261,16 @@ bool FastISel::hasTrivialKill(const Value *V) { if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) return false; + // Casts and extractvalues may be trivially coalesced by fast-isel. + if (I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt || + I->getOpcode() == Instruction::IntToPtr || + I->getOpcode() == Instruction::ExtractValue) + return false; + // Only instructions with a single use in the same basic block are considered // to have trivial kills. return I->hasOneUse() && - !(I->getOpcode() == Instruction::BitCast || - I->getOpcode() == Instruction::PtrToInt || - I->getOpcode() == Instruction::IntToPtr) && cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6638ff6a6358..a6bd774934ac 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9660,8 +9660,9 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, // We will look through cast uses, so ignore them completely. if (I.isCast()) continue; - // Ignore debug info intrinsics, they don't escape or store to allocas. - if (isa<DbgInfoIntrinsic>(I)) + // Ignore debug info and pseudo op intrinsics, they don't escape or store + // to allocas. + if (I.isDebugOrPseudoInst()) continue; // This is an unknown instruction. Assume it escapes or writes to all // static alloca operands. diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5760132e44a0..b0ad86899d25 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2012,7 +2012,7 @@ bool TargetLowering::SimplifyDemandedBits( const APInt *ShAmtC = TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts); - if (!ShAmtC) + if (!ShAmtC || ShAmtC->uge(BitWidth)) break; uint64_t ShVal = ShAmtC->getZExtValue(); @@ -5935,6 +5935,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); + // Because getNegatedExpression can delete nodes we need a handle to keep + // temporary nodes alive in case the recursion manages to create an identical + // node. + std::list<HandleSDNode> Handles; + switch (Opcode) { case ISD::ConstantFP: { // Don't invert constant FP values after legalization unless the target says @@ -6003,11 +6008,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, NegatibleCost CostX = NegatibleCost::Expensive; SDValue NegX = getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // Prevent this node from being deleted by the next call. + if (NegX) + Handles.emplace_back(NegX); + // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X) NegatibleCost CostY = NegatibleCost::Expensive; SDValue NegY = getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + // We're done with the handles. + Handles.clear(); + // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = CostX; @@ -6052,11 +6064,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, NegatibleCost CostX = NegatibleCost::Expensive; SDValue NegX = getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // Prevent this node from being deleted by the next call. + if (NegX) + Handles.emplace_back(NegX); + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) NegatibleCost CostY = NegatibleCost::Expensive; SDValue NegY = getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + // We're done with the handles. + Handles.clear(); + // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = CostX; @@ -6094,15 +6113,25 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, if (!NegZ) break; + // Prevent this node from being deleted by the next two calls. + Handles.emplace_back(NegZ); + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) NegatibleCost CostX = NegatibleCost::Expensive; SDValue NegX = getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // Prevent this node from being deleted by the next call. + if (NegX) + Handles.emplace_back(NegX); + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) NegatibleCost CostY = NegatibleCost::Expensive; SDValue NegY = getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + // We're done with the handles. + Handles.clear(); + // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = std::min(CostX, CostZ); diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 0411faabbcc3..8d91afb6e99d 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -192,7 +192,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, // Ignore intrinsics that do not become real instructions. // TODO: Narrow this to intrinsics that have store-like effects. const auto *CI = cast<CallInst>(I); - if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd()) + if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd()) return true; break; } diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index ecee4aed7f88..2a9132bd2fe0 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -801,8 +801,8 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill( MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; for (MachineInstr &OtherMI : make_range(End, KillPos)) { - // Debug instructions cannot be counted against the limit. - if (OtherMI.isDebugInstr()) + // Debug or pseudo instructions cannot be counted against the limit. + if (OtherMI.isDebugOrPseudoInstr()) continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; @@ -974,8 +974,8 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI( unsigned NumVisited = 0; for (MachineInstr &OtherMI : make_range(mi, MachineBasicBlock::iterator(KillMI))) { - // Debug instructions cannot be counted against the limit. - if (OtherMI.isDebugInstr()) + // Debug or pseudo instructions cannot be counted against the limit. + if (OtherMI.isDebugOrPseudoInstr()) continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp index dfdd2c6c669f..834d4cc8f514 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp @@ -393,7 +393,7 @@ void LLVMOrcDisposeJITTargetMachineBuilder( delete unwrap(JTMB); } -void lLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) { +void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) { delete unwrap(ObjLayer); } diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 23e7af6287b6..7d83cf5dcf1d 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -937,6 +937,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys); return true; } + } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::ptr_annotation, + F->arg_begin()->getType()); + return true; } break; @@ -947,6 +953,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; + case 'v': { + if (Name == "var.annotation" && F->arg_size() == 4) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::var_annotation); + return true; + } + break; + } + case 'x': if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) return true; @@ -3730,6 +3746,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; + case Intrinsic::ptr_annotation: + // Upgrade from versions that lacked the annotation attribute argument. + assert(CI->getNumArgOperands() == 4 && + "Before LLVM 12.0 this intrinsic took four arguments"); + // Create a new call with an added null annotation attribute argument. + NewCall = Builder.CreateCall( + NewFn, + {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), + CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())}); + NewCall->takeName(CI); + CI->replaceAllUsesWith(NewCall); + CI->eraseFromParent(); + return; + + case Intrinsic::var_annotation: + // Upgrade from versions that lacked the annotation attribute argument. + assert(CI->getNumArgOperands() == 4 && + "Before LLVM 12.0 this intrinsic took four arguments"); + // Create a new call with an added null annotation attribute argument. + NewCall = Builder.CreateCall( + NewFn, + {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), + CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())}); + CI->eraseFromParent(); + return; + case Intrinsic::x86_xop_vfrcz_ss: case Intrinsic::x86_xop_vfrcz_sd: NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)}); diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 1e3fcd672a43..8e52dd3ddc71 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -633,6 +633,16 @@ bool Instruction::isSafeToRemove() const { !this->isTerminator(); } +bool Instruction::willReturn() const { + if (const auto *CB = dyn_cast<CallBase>(this)) + // FIXME: Temporarily assume that all side-effect free intrinsics will + // return. Remove this workaround once all intrinsics are appropriately + // annotated. + return CB->hasFnAttr(Attribute::WillReturn) || + (isa<IntrinsicInst>(CB) && CB->onlyReadsMemory()); + return true; +} + bool Instruction::isLifetimeStartOrEnd() const { auto II = dyn_cast<IntrinsicInst>(this); if (!II) @@ -641,6 +651,10 @@ bool Instruction::isLifetimeStartOrEnd() const { return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end; } +bool Instruction::isDebugOrPseudoInst() const { + return isa<DbgInfoIntrinsic>(this) || isa<PseudoProbeInst>(this); +} + const Instruction * Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const { for (const Instruction *I = getNextNode(); I; I = I->getNextNode()) diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index 0f70fc37dee2..69181f35827b 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -61,10 +61,17 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const { bool GEPOperator::accumulateConstantOffset( const DataLayout &DL, APInt &Offset, function_ref<bool(Value &, APInt &)> ExternalAnalysis) const { - assert(Offset.getBitWidth() == - DL.getIndexSizeInBits(getPointerAddressSpace()) && - "The offset bit width does not match DL specification."); + assert(Offset.getBitWidth() == + DL.getIndexSizeInBits(getPointerAddressSpace()) && + "The offset bit width does not match DL specification."); + SmallVector<const Value *> Index(value_op_begin() + 1, value_op_end()); + return GEPOperator::accumulateConstantOffset(getSourceElementType(), Index, + DL, Offset, ExternalAnalysis); +} +bool GEPOperator::accumulateConstantOffset( + Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL, + APInt &Offset, function_ref<bool(Value &, APInt &)> ExternalAnalysis) { bool UsedExternalAnalysis = false; auto AccumulateOffset = [&](APInt Index, uint64_t Size) -> bool { Index = Index.sextOrTrunc(Offset.getBitWidth()); @@ -85,9 +92,10 @@ bool GEPOperator::accumulateConstantOffset( } return true; }; - - for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this); - GTI != GTE; ++GTI) { + auto begin = generic_gep_type_iterator<decltype(Index.begin())>::begin( + SourceType, Index.begin()); + auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end()); + for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) { // Scalable vectors are multiplied by a runtime constant. bool ScalableType = false; if (isa<ScalableVectorType>(GTI.getIndexedType())) diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index a8cc308b4e3a..cdbcde50d33a 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -794,7 +794,6 @@ LineCoverageStats::LineCoverageStats( ExecutionCount = WrappedSegment->Count; if (!MinRegionCount) return; - ExecutionCount = 0; for (const auto *LS : LineSegments) if (isStartOfRegion(LS)) ExecutionCount = std::max(ExecutionCount, LS->Count); diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp index d2603097c550..0e03aa50173d 100644 --- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -18,9 +18,14 @@ #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +cl::opt<bool> UseContextLessSummary( + "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore, + cl::desc("Merge context profiles before calculating thresholds.")); + // A set of cutoff values. Each value, when divided by ProfileSummary::Scale // (which is 1000000) is a desired percentile of total counts. static const uint32_t DefaultCutoffsData[] = { @@ -111,6 +116,35 @@ std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() { MaxFunctionCount, NumCounts, NumFunctions); } +std::unique_ptr<ProfileSummary> +SampleProfileSummaryBuilder::computeSummaryForProfiles( + const StringMap<sampleprof::FunctionSamples> &Profiles) { + assert(NumFunctions == 0 && + "This can only be called on an empty summary builder"); + StringMap<sampleprof::FunctionSamples> ContextLessProfiles; + const StringMap<sampleprof::FunctionSamples> *ProfilesToUse = &Profiles; + // For CSSPGO, context-sensitive profile effectively split a function profile + // into many copies each representing the CFG profile of a particular calling + // context. That makes the count distribution looks more flat as we now have + // more function profiles each with lower counts, which in turn leads to lower + // hot thresholds. To compensate for that, by defauly we merge context + // profiles before coumputing profile summary. + if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS && + !UseContextLessSummary.getNumOccurrences())) { + for (const auto &I : Profiles) { + ContextLessProfiles[I.second.getName()].merge(I.second); + } + ProfilesToUse = &ContextLessProfiles; + } + + for (const auto &I : *ProfilesToUse) { + const sampleprof::FunctionSamples &Profile = I.second; + addRecord(Profile); + } + + return getSummary(); +} + std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() { computeDetailedSummary(); return std::make_unique<ProfileSummary>( diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 370ffc8e2885..38cbca844c87 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -1610,9 +1610,5 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, // profile. Binary format has the profile summary in its header. void SampleProfileReader::computeSummary() { SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); - for (const auto &I : Profiles) { - const FunctionSamples &Profile = I.second; - Builder.addRecord(Profile); - } - Summary = Builder.getSummary(); + Summary = Builder.computeSummaryForProfiles(Profiles); } diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index d3bc05e06fdf..8017f2a82804 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -360,10 +360,7 @@ std::error_code SampleProfileWriterCompactBinary::write( /// it needs to be parsed by the SampleProfileReaderText class. std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { auto &OS = *OutputStream; - if (FunctionSamples::ProfileIsCS) - OS << "[" << S.getNameWithContext() << "]:" << S.getTotalSamples(); - else - OS << S.getName() << ":" << S.getTotalSamples(); + OS << S.getNameWithContext(true) << ":" << S.getTotalSamples(); if (Indent == 0) OS << ":" << S.getHeadSamples(); OS << "\n"; @@ -752,9 +749,5 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS, void SampleProfileWriter::computeSummary( const StringMap<FunctionSamples> &ProfileMap) { SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); - for (const auto &I : ProfileMap) { - const FunctionSamples &Profile = I.second; - Builder.addRecord(Profile); - } - Summary = Builder.getSummary(); + Summary = Builder.computeSummaryForProfiles(ProfileMap); } diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index dc9bcf868381..adcbd1b5f8f3 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -402,8 +402,22 @@ std::error_code is_local(int FD, bool &Result) { } static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) { - // First, check if the file is on a network (non-local) drive. If so, don't - // set DeleteFile to true, since it prevents opening the file for writes. + // Clear the FILE_DISPOSITION_INFO flag first, before checking if it's a + // network file. On Windows 7 the function realPathFromHandle() below fails + // if the FILE_DISPOSITION_INFO flag was already set to 'DeleteFile = true' by + // a prior call. + FILE_DISPOSITION_INFO Disposition; + Disposition.DeleteFile = false; + if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition, + sizeof(Disposition))) + return mapWindowsError(::GetLastError()); + if (!Delete) + return std::error_code(); + + // Check if the file is on a network (non-local) drive. If so, don't + // continue when DeleteFile is true, since it prevents opening the file for + // writes. Note -- this will leak temporary files on disk, but only when the + // target file is on a network drive. SmallVector<wchar_t, 128> FinalPath; if (std::error_code EC = realPathFromHandle(Handle, FinalPath)) return EC; @@ -415,9 +429,9 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) { if (!IsLocal) return std::error_code(); - // The file is on a local drive, set the DeleteFile to true. - FILE_DISPOSITION_INFO Disposition; - Disposition.DeleteFile = Delete; + // The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's + // flag. + Disposition.DeleteFile = true; if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition, sizeof(Disposition))) return mapWindowsError(::GetLastError()); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1be09186dc0a..1451151f4dc5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1017,11 +1017,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Vector reductions for (MVT VT : { MVT::v4f16, MVT::v2f32, MVT::v8f16, MVT::v4f32, MVT::v2f64 }) { - setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); - setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); + if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) { + setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); - if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); + } } for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 693b0adaede4..2604218da160 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -5896,7 +5896,13 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { User->getMachineOpcode() != PPC::SELECT_I8) return false; + SDNode *Op1 = User->getOperand(1).getNode(); SDNode *Op2 = User->getOperand(2).getNode(); + // If we have a degenerate select with two equal operands, swapping will + // not do anything, and we may run into an infinite loop. + if (Op1 == Op2) + return false; + if (!Op2->isMachineOpcode()) return false; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 86fbc73d81d5..b3fc76aee161 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -504,19 +504,19 @@ def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">; def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">; def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">; -defm VL1R : VWholeLoad<1, "vl1r">; -defm VL2R : VWholeLoad<2, "vl2r">; -defm VL4R : VWholeLoad<4, "vl4r">; -defm VL8R : VWholeLoad<8, "vl8r">; +defm VL1R : VWholeLoad<0, "vl1r">; +defm VL2R : VWholeLoad<1, "vl2r">; +defm VL4R : VWholeLoad<3, "vl4r">; +defm VL8R : VWholeLoad<7, "vl8r">; def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VR:$vd, GPR:$rs1)>; -def VS1R_V : VWholeStore<1, "vs1r.v">; -def VS2R_V : VWholeStore<2, "vs2r.v">; -def VS4R_V : VWholeStore<4, "vs4r.v">; -def VS8R_V : VWholeStore<8, "vs8r.v">; +def VS1R_V : VWholeStore<0, "vs1r.v">; +def VS2R_V : VWholeStore<1, "vs2r.v">; +def VS4R_V : VWholeStore<3, "vs4r.v">; +def VS8R_V : VWholeStore<7, "vs8r.v">; // Vector Single-Width Integer Add and Subtract defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>; diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index caf158102230..a1a16a19f5e5 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -284,6 +284,14 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, return false; } + // Make sure no potentially eflags clobbering phi moves can be inserted in + // between. + auto HasPhis = [](const BasicBlock *Succ) { + return !llvm::empty(Succ->phis()); + }; + if (I->isTerminator() && llvm::any_of(successors(I), HasPhis)) + return false; + CC = TmpCC; return true; } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7cf555748c46..a185a2007b72 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3778,7 +3778,7 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { VEX_4V, VEX_WIG; defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V; + VEX_4V, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { @@ -3794,7 +3794,7 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { VEX_4V, VEX_L, VEX_WIG; defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L; + VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { @@ -4756,7 +4756,7 @@ let isCommutable = 0 in { SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, load, i128mem, - SchedWritePHAdd.XMM, 0>, VEX_4V; + SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", int_x86_ssse3_psign_b_128, SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; @@ -4802,7 +4802,7 @@ let isCommutable = 0 in { SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, load, i256mem, - SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L; + SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, @@ -6503,7 +6503,7 @@ multiclass pcmpistrm_SS42AI<string asm> { let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { let Predicates = [HasAVX] in - defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX; + defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG; defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ; } @@ -6521,7 +6521,7 @@ multiclass SS42AI_pcmpestrm<string asm> { let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { let Predicates = [HasAVX] in - defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX; + defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG; defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">; } @@ -6539,7 +6539,7 @@ multiclass SS42AI_pcmpistri<string asm> { let Defs = [ECX, EFLAGS], hasSideEffects = 0 in { let Predicates = [HasAVX] in - defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; + defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG; defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; } @@ -6557,7 +6557,7 @@ multiclass SS42AI_pcmpestri<string asm> { let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { let Predicates = [HasAVX] in - defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; + defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG; defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; } diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 30a1f81ad0e1..6730824e860a 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -149,6 +149,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, if (isNoModRef(MRI)) continue; + // A pseudo probe call shouldn't change any function attribute since it + // doesn't translate to a real instruction. It comes with a memory access + // tag to prevent itself being removed by optimizations and not block + // other instructions being optimized. + if (isa<PseudoProbeInst>(I)) + continue; + if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { // The call could access any memory. If that includes writes, note it. if (isModSet(MRI)) @@ -1445,8 +1452,7 @@ static bool functionWillReturn(const Function &F) { // If there are no loops, then the function is willreturn if all calls in // it are willreturn. return all_of(instructions(F), [](const Instruction &I) { - const auto *CB = dyn_cast<CallBase>(&I); - return !CB || CB->hasFnAttr(Attribute::WillReturn); + return I.willReturn(); }); } diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index 41d7f363e1a4..158fa0771c3b 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -263,6 +263,17 @@ SampleContextTracker::getContextSamplesFor(const SampleContext &Context) { return Node->getFunctionSamples(); } +SampleContextTracker::ContextSamplesTy & +SampleContextTracker::getAllContextSamplesFor(const Function &Func) { + StringRef CanonName = FunctionSamples::getCanonicalFnName(Func); + return FuncToCtxtProfileSet[CanonName]; +} + +SampleContextTracker::ContextSamplesTy & +SampleContextTracker::getAllContextSamplesFor(StringRef Name) { + return FuncToCtxtProfileSet[Name]; +} + FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func, bool MergeContext) { StringRef CanonName = FunctionSamples::getCanonicalFnName(Func); @@ -550,4 +561,25 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( return *ToNode; } +// Replace call graph edges with dynamic call edges from the profile. +void SampleContextTracker::addCallGraphEdges(CallGraph &CG, + StringMap<Function *> &SymbolMap) { + // Add profile call edges to the call graph. + std::queue<ContextTrieNode *> NodeQueue; + NodeQueue.push(&RootContext); + while (!NodeQueue.empty()) { + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + Function *F = SymbolMap.lookup(Node->getFuncName()); + for (auto &I : Node->getAllChildContext()) { + ContextTrieNode *ChildNode = &I.second; + NodeQueue.push(ChildNode); + if (F && !F->isDeclaration()) { + Function *Callee = SymbolMap.lookup(ChildNode->getFuncName()); + if (Callee && !Callee->isDeclaration()) + CG[F]->addCalledFunction(nullptr, CG[Callee]); + } + } + } +} } // namespace llvm diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index b2a9127773c3..a6a419bfe742 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -177,6 +177,16 @@ static cl::opt<bool> ProfileTopDownLoad( "order of call graph during sample profile loading. It only " "works for new pass manager. ")); +static cl::opt<bool> UseProfileIndirectCallEdges( + "use-profile-indirect-call-edges", cl::init(true), cl::Hidden, + cl::desc("Considering indirect call samples from profile when top-down " + "processing functions. Only CSSPGO is supported.")); + +static cl::opt<bool> UseProfileTopDownOrder( + "use-profile-top-down-order", cl::init(false), cl::Hidden, + cl::desc("Process functions in one SCC in a top-down order " + "based on the input profile.")); + static cl::opt<bool> ProfileSizeInline( "sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " @@ -458,6 +468,8 @@ protected: uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(Function &F); std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG); + void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples); + void replaceCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap); bool propagateThroughEdges(Function &F, bool UpdateBlockCount); void computeDominanceAndLoopInfo(Function &F); void clearFunctionData(); @@ -2278,6 +2290,45 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) +// Add inlined profile call edges to the call graph. +void SampleProfileLoader::addCallGraphEdges(CallGraph &CG, + const FunctionSamples &Samples) { + Function *Caller = SymbolMap.lookup(Samples.getFuncName()); + if (!Caller || Caller->isDeclaration()) + return; + + // Skip non-inlined call edges which are not important since top down inlining + // for non-CS profile is to get more precise profile matching, not to enable + // more inlining. + + for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) { + for (const auto &InlinedSamples : CallsiteSamples.second) { + Function *Callee = SymbolMap.lookup(InlinedSamples.first); + if (Callee && !Callee->isDeclaration()) + CG[Caller]->addCalledFunction(nullptr, CG[Callee]); + addCallGraphEdges(CG, InlinedSamples.second); + } + } +} + +// Replace call graph edges with dynamic call edges from the profile. +void SampleProfileLoader::replaceCallGraphEdges( + CallGraph &CG, StringMap<Function *> &SymbolMap) { + // Remove static call edges from the call graph except for the ones from the + // root which make the call graph connected. + for (const auto &Node : CG) + if (Node.second.get() != CG.getExternalCallingNode()) + Node.second->removeAllCalledFunctions(); + + // Add profile call edges to the call graph. + if (ProfileIsCS) { + ContextTracker->addCallGraphEdges(CG, SymbolMap); + } else { + for (const auto &Samples : Reader->getProfiles()) + addCallGraphEdges(CG, Samples.second); + } +} + std::vector<Function *> SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { std::vector<Function *> FunctionOrderList; @@ -2300,16 +2351,97 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { } assert(&CG->getModule() == &M); + + // Add indirect call edges from profile to augment the static call graph. + // Functions will be processed in a top-down order defined by the static call + // graph. Adjusting the order by considering indirect call edges from the + // profile (which don't exist in the static call graph) can enable the + // inlining of indirect call targets by processing the caller before them. + // TODO: enable this for non-CS profile and fix the counts returning logic to + // have a full support for indirect calls. + if (UseProfileIndirectCallEdges && ProfileIsCS) { + for (auto &Entry : *CG) { + const auto *F = Entry.first; + if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile")) + continue; + auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName()); + if (AllContexts.empty()) + continue; + + for (const auto &BB : *F) { + for (const auto &I : BB.getInstList()) { + const auto *CB = dyn_cast<CallBase>(&I); + if (!CB || !CB->isIndirectCall()) + continue; + const DebugLoc &DLoc = I.getDebugLoc(); + if (!DLoc) + continue; + auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc); + for (FunctionSamples *Samples : AllContexts) { + if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) { + for (const auto &Target : CallTargets.get()) { + Function *Callee = SymbolMap.lookup(Target.first()); + if (Callee && !Callee->isDeclaration()) + Entry.second->addCalledFunction(nullptr, (*CG)[Callee]); + } + } + } + } + } + } + } + + // Compute a top-down order the profile which is used to sort functions in + // one SCC later. The static processing order computed for an SCC may not + // reflect the call contexts in the context-sensitive profile, thus may cause + // potential inlining to be overlooked. The function order in one SCC is being + // adjusted to a top-down order based on the profile to favor more inlining. + DenseMap<Function *, uint64_t> ProfileOrderMap; + if (UseProfileTopDownOrder || + (ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) { + // Create a static call graph. The call edges are not important since they + // will be replaced by dynamic edges from the profile. + CallGraph ProfileCG(M); + replaceCallGraphEdges(ProfileCG, SymbolMap); + scc_iterator<CallGraph *> CGI = scc_begin(&ProfileCG); + uint64_t I = 0; + while (!CGI.isAtEnd()) { + for (CallGraphNode *Node : *CGI) { + if (auto *F = Node->getFunction()) + ProfileOrderMap[F] = ++I; + } + ++CGI; + } + } + scc_iterator<CallGraph *> CGI = scc_begin(CG); while (!CGI.isAtEnd()) { - for (CallGraphNode *node : *CGI) { - auto F = node->getFunction(); + uint64_t Start = FunctionOrderList.size(); + for (CallGraphNode *Node : *CGI) { + auto *F = Node->getFunction(); if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(F); } + + // Sort nodes in SCC based on the profile top-down order. + if (!ProfileOrderMap.empty()) { + std::stable_sort(FunctionOrderList.begin() + Start, + FunctionOrderList.end(), + [&ProfileOrderMap](Function *Left, Function *Right) { + return ProfileOrderMap[Left] < ProfileOrderMap[Right]; + }); + } + ++CGI; } + LLVM_DEBUG({ + dbgs() << "Function processing order:\n"; + for (auto F : reverse(FunctionOrderList)) { + dbgs() << F->getName() << "\n"; + } + }); + std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); return FunctionOrderList; } @@ -2461,6 +2593,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { } bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { + LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n"); DILocation2SampleMap.clear(); // By default the entry count is initialized to -1, which will be treated // conservatively by getEntryCount as the same as unknown (None). This is diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 0b53007bb6dc..07e68c44416d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1270,6 +1270,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) { ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0)); ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1)); if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() && (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index d687ec654438..b211b0813611 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -592,8 +592,14 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end(); for (++BBI; BBI != E; ++BBI) - if (BBI->mayWriteToMemory()) + if (BBI->mayWriteToMemory()) { + // Calls that only access inaccessible memory do not block sinking the + // load. + if (auto *CB = dyn_cast<CallBase>(BBI)) + if (CB->onlyAccessesInaccessibleMemory()) + continue; return false; + } // Check for non-address taken alloca. If not address-taken already, it isn't // profitable to do this xform. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index c265516213aa..16efe863779a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -345,10 +345,14 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return false; // Get the constant out of the ICmp, if there is one. + // Only try this when exactly 1 operand is a constant (if both operands + // are constant, the icmp should eventually simplify). Otherwise, we may + // invert the transform that reduces set bits and infinite-loop. + Value *X; const APInt *CmpC; ICmpInst::Predicate Pred; - if (!match(I->getOperand(0), m_c_ICmp(Pred, m_APInt(CmpC), m_Value())) || - CmpC->getBitWidth() != SelC->getBitWidth()) + if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) || + isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth()) return ShrinkDemandedConstant(I, OpNo, DemandedMask); // If the constant is already the same as the ICmp, leave it as-is. diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 518e909e8ab4..828fd49524ec 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3878,9 +3878,10 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, } } - // Skip processing debug intrinsics in InstCombine. Processing these call instructions - // consumes non-trivial amount of time and provides no value for the optimization. - if (!isa<DbgInfoIntrinsic>(Inst)) { + // Skip processing debug and pseudo intrinsics in InstCombine. Processing + // these call instructions consumes non-trivial amount of time and + // provides no value for the optimization. + if (!Inst->isDebugOrPseudoInst()) { InstrsForInstCombineWorklist.push_back(Inst); SeenAliasScopes.analyse(Inst); } diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp index 2b649732a799..ce4e5e575fbf 100644 --- a/llvm/lib/Transforms/Scalar/ADCE.cpp +++ b/llvm/lib/Transforms/Scalar/ADCE.cpp @@ -325,7 +325,7 @@ void AggressiveDeadCodeElimination::initialize() { bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) { // TODO -- use llvm::isInstructionTriviallyDead - if (I.isEHPad() || I.mayHaveSideEffects()) { + if (I.isEHPad() || I.mayHaveSideEffects() || !I.willReturn()) { // Skip any value profile instrumentation calls if they are // instrumenting constants. if (isInstrumentsConstant(I)) diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 96aef90c1c1a..10b08b4e2224 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -2076,6 +2076,15 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, ValueMapping[PN] = NewPN; } + // Clone noalias scope declarations in the threaded block. When threading a + // loop exit, we would otherwise end up with two idential scope declarations + // visible at the same time. + SmallVector<MDNode *> NoAliasScopes; + DenseMap<MDNode *, MDNode *> ClonedScopes; + LLVMContext &Context = PredBB->getContext(); + identifyNoAliasScopesToClone(BI, BE, NoAliasScopes); + cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context); + // Clone the non-phi instructions of the source basic block into NewBB, // keeping track of the mapping and using it to remap operands in the cloned // instructions. @@ -2084,6 +2093,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, New->setName(BI->getName()); NewBB->getInstList().push_back(New); ValueMapping[&*BI] = New; + adaptNoAliasScopes(New, ClonedScopes, Context); // Remap operands to patch up intra-block references. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index d111a6ba4241..af510f1a84bf 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2524,7 +2524,7 @@ private: NewAI.getAlign(), LI.isVolatile(), LI.getName()); if (AATags) - NewLI->setAAMetadata(AATags); + NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); if (NewLI->isAtomic()) @@ -2563,7 +2563,7 @@ private: IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(), LI.isVolatile(), LI.getName()); if (AATags) - NewLI->setAAMetadata(AATags); + NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); @@ -2626,7 +2626,7 @@ private: } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign()); if (AATags) - Store->setAAMetadata(AATags); + Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); Pass.DeadInsts.push_back(&SI); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); @@ -2650,7 +2650,7 @@ private: Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Store->setAAMetadata(AATags); + Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); Pass.DeadInsts.push_back(&SI); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2720,7 +2720,7 @@ private: NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - NewSI->setAAMetadata(AATags); + NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); if (SI.isVolatile()) NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); if (NewSI->isAtomic()) @@ -2816,7 +2816,7 @@ private: getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size, MaybeAlign(getSliceAlign()), II.isVolatile()); if (AATags) - New->setAAMetadata(AATags); + New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return false; } @@ -2885,7 +2885,7 @@ private: StoreInst *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile()); if (AATags) - New->setAAMetadata(AATags); + New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return !II.isVolatile(); } @@ -3006,7 +3006,7 @@ private: CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign, Size, II.isVolatile()); if (AATags) - New->setAAMetadata(AATags); + New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return false; } @@ -3060,7 +3060,7 @@ private: LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign, II.isVolatile(), "copyload"); if (AATags) - Load->setAAMetadata(AATags); + Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); Src = Load; } @@ -3080,7 +3080,7 @@ private: StoreInst *Store = cast<StoreInst>( IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); if (AATags) - Store->setAAMetadata(AATags); + Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return !II.isVolatile(); } @@ -3381,8 +3381,13 @@ private: IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load"); - if (AATags) - Load->setAAMetadata(AATags); + + APInt Offset( + DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); + if (AATags && + GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) + Load->setAAMetadata(AATags.shift(Offset.getZExtValue())); + Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); } @@ -3428,8 +3433,13 @@ private: IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); StoreInst *Store = IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment); - if (AATags) - Store->setAAMetadata(AATags); + + APInt Offset( + DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); + if (AATags && + GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) + Store->setAAMetadata(AATags.shift(Offset.getZExtValue())); + LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); } }; diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 51a49574e55d..6ab061510a60 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -989,3 +989,11 @@ void llvm::identifyNoAliasScopesToClone( if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) NoAliasDeclScopes.push_back(Decl->getScopeList()); } + +void llvm::identifyNoAliasScopesToClone( + BasicBlock::iterator Start, BasicBlock::iterator End, + SmallVectorImpl<MDNode *> &NoAliasDeclScopes) { + for (Instruction &I : make_range(Start, End)) + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + NoAliasDeclScopes.push_back(Decl->getScopeList()); +} diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 477ea458c763..ae26058c210c 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -420,13 +420,8 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, return true; } - if (auto *CB = dyn_cast<CallBase>(I)) { - // Treat calls that may not return as alive. - // TODO: Remove the intrinsic escape hatch once all intrinsics set - // willreturn properly. - if (!CB->willReturn() && !isa<IntrinsicInst>(I)) - return false; - } + if (!I->willReturn()) + return false; if (!I->mayHaveSideEffects()) return true; @@ -923,6 +918,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN, /// \param IncomingValues A map from block to value. static void replaceUndefValuesInPhi(PHINode *PN, const IncomingValueMap &IncomingValues) { + SmallVector<unsigned> TrueUndefOps; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); @@ -930,10 +926,31 @@ static void replaceUndefValuesInPhi(PHINode *PN, BasicBlock *BB = PN->getIncomingBlock(i); IncomingValueMap::const_iterator It = IncomingValues.find(BB); - if (It == IncomingValues.end()) continue; + // Keep track of undef/poison incoming values. Those must match, so we fix + // them up below if needed. + // Note: this is conservatively correct, but we could try harder and group + // the undef values per incoming basic block. + if (It == IncomingValues.end()) { + TrueUndefOps.push_back(i); + continue; + } + + // There is a defined value for this incoming block, so map this undef + // incoming value to the defined value. PN->setIncomingValue(i, It->second); } + + // If there are both undef and poison values incoming, then convert those + // values to undef. It is invalid to have different values for the same + // incoming block. + unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) { + return isa<PoisonValue>(PN->getIncomingValue(i)); + }); + if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) { + for (unsigned i : TrueUndefOps) + PN->setIncomingValue(i, UndefValue::get(PN->getType())); + } } /// Replace a value flowing from a block to a phi with diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7cfe17618cde..de9560df9785 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1628,6 +1628,11 @@ static bool canSinkInstructions( I->getType()->isTokenTy()) return false; + // Do not try to sink an instruction in an infinite loop - it can cause + // this algorithm to infinite loop. + if (I->getParent()->getSingleSuccessor() == I->getParent()) + return false; + // Conservatively return false if I is an inline-asm instruction. Sinking // and merging inline-asm instructions can potentially create arguments // that cannot satisfy the inline-asm constraints. @@ -1714,13 +1719,13 @@ static bool canSinkInstructions( return true; } -// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last +// Assuming canSinkInstructions(Blocks) has returned true, sink the last // instruction of every block in Blocks to their common successor, commoning // into one instruction. static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); - // canSinkLastInstruction returning true guarantees that every block has at + // canSinkInstructions returning true guarantees that every block has at // least one non-terminator instruction. SmallVector<Instruction*,4> Insts; for (auto *BB : Blocks) { @@ -1733,9 +1738,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { } // The only checking we need to do now is that all users of all instructions - // are the same PHI node. canSinkLastInstruction should have checked this but - // it is slightly over-aggressive - it gets confused by commutative instructions - // so double-check it here. + // are the same PHI node. canSinkInstructions should have checked this but + // it is slightly over-aggressive - it gets confused by commutative + // instructions so double-check it here. Instruction *I0 = Insts.front(); if (!I0->user_empty()) { auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); @@ -1746,11 +1751,11 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { return false; } - // We don't need to do any more checking here; canSinkLastInstruction should + // We don't need to do any more checking here; canSinkInstructions should // have done it all for us. SmallVector<Value*, 4> NewOperands; for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { - // This check is different to that in canSinkLastInstruction. There, we + // This check is different to that in canSinkInstructions. There, we // cared about the global view once simplifycfg (and instcombine) have // completed - it takes into account PHIs that become trivially // simplifiable. However here we need a more local view; if an operand diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 1795470fa58c..19797e6f7858 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -142,6 +142,10 @@ public: return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}); } + VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) { + return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}); + } + //===--------------------------------------------------------------------===// // RAII helpers. //===--------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 47635dbdda02..b456a97aa4ec 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) { /// A helper function that returns true if the given type is irregular. The /// type is irregular if its allocated size doesn't equal the store size of an -/// element of the corresponding vector type at the given vectorization factor. -static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) { - // Determine if an array of VF elements of type Ty is "bitcast compatible" - // with a <VF x Ty> vector. - if (VF.isVector()) { - auto *VectorTy = VectorType::get(Ty, VF); - return TypeSize::get(VF.getKnownMinValue() * - DL.getTypeAllocSize(Ty).getFixedValue(), - VF.isScalable()) != DL.getTypeStoreSize(VectorTy); - } - - // If the vectorization factor is one, we just check if an array of type Ty - // requires padding between elements. +/// element of the corresponding vector type. +static bool hasIrregularType(Type *Ty, const DataLayout &DL) { + // Determine if an array of N elements of type Ty is "bitcast compatible" + // with a <N x Ty> vector. + // This is only true if there is no padding between the array elements. return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } @@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened( // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = getMemInstValueType(I); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; // Check if masking is required. @@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened( // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType(); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; return true; @@ -8195,8 +8187,15 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst, if (BI->getSuccessor(0) != Dst) EdgeMask = Builder.createNot(EdgeMask); - if (SrcMask) // Otherwise block in-mask is all-one, no need to AND. - EdgeMask = Builder.createAnd(EdgeMask, SrcMask); + if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND. + // The condition is 'SrcMask && EdgeMask', which is equivalent to + // 'select i1 SrcMask, i1 EdgeMask, i1 false'. + // The select version does not introduce new UB if SrcMask is false and + // EdgeMask is poison. Using 'and' here introduces undefined behavior. + VPValue *False = Plan->getOrAddVPValue( + ConstantInt::getFalse(BI->getCondition()->getType())); + EdgeMask = Builder.createSelect(SrcMask, EdgeMask, False); + } return EdgeMaskCache[Edge] = EdgeMask; } diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp index 9aed3526b0aa..d495bd3d4cab 100644 --- a/llvm/tools/llvm-dwp/llvm-dwp.cpp +++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp @@ -526,8 +526,8 @@ getDWOFilenames(StringRef ExecFilename) { std::string DWOCompDir = dwarf::toString(Die.find(dwarf::DW_AT_comp_dir), ""); if (!DWOCompDir.empty()) { - SmallString<16> DWOPath; - sys::path::append(DWOPath, DWOCompDir, DWOName); + SmallString<16> DWOPath(std::move(DWOName)); + sys::fs::make_absolute(DWOCompDir, DWOPath); DWOPaths.emplace_back(DWOPath.data(), DWOPath.size()); } else { DWOPaths.push_back(std::move(DWOName)); diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 3134f989603a..17128e95727f 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -947,8 +947,8 @@ protected: std::unordered_map<std::string, std::vector<StringRef>> LineCache; // Keep track of missing sources. StringSet<> MissingSources; - // Only emit 'no debug info' warning once. - bool WarnedNoDebugInfo; + // Only emit 'invalid debug info' warning once. + bool WarnedInvalidDebugInfo = false; private: bool cacheSource(const DILineInfo& LineInfoFile); @@ -962,8 +962,7 @@ private: public: SourcePrinter() = default; - SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) - : Obj(Obj), WarnedNoDebugInfo(false) { + SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) { symbolize::LLVMSymbolizer::Options SymbolizerOpts; SymbolizerOpts.PrintFunctions = DILineInfoSpecifier::FunctionNameKind::LinkageName; @@ -1018,22 +1017,17 @@ void SourcePrinter::printSourceLine(formatted_raw_ostream &OS, return; DILineInfo LineInfo = DILineInfo(); - auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address); + Expected<DILineInfo> ExpectedLineInfo = + Symbolizer->symbolizeCode(*Obj, Address); std::string ErrorMessage; - if (!ExpectedLineInfo) - ErrorMessage = toString(ExpectedLineInfo.takeError()); - else + if (ExpectedLineInfo) { LineInfo = *ExpectedLineInfo; - - if (LineInfo.FileName == DILineInfo::BadString) { - if (!WarnedNoDebugInfo) { - std::string Warning = - "failed to parse debug information for " + ObjectFilename.str(); - if (!ErrorMessage.empty()) - Warning += ": " + ErrorMessage; - reportWarning(Warning, ObjectFilename); - WarnedNoDebugInfo = true; - } + } else if (!WarnedInvalidDebugInfo) { + WarnedInvalidDebugInfo = true; + // TODO Untested. + reportWarning("failed to parse debug information: " + + toString(ExpectedLineInfo.takeError()), + ObjectFilename); } if (!Prefix.empty() && sys::path::is_absolute_gnu(LineInfo.FileName)) { diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 9c68acee0ae2..8734c2d74045 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -181,7 +181,12 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA, // the topmost function, which suits our needs better. auto ResOrErr = Symbolizer.symbolizeInlinedCode( ModuleName, {Offset, object::SectionedAddress::UndefSection}); - Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0)); + if (!ResOrErr || ResOrErr->getNumberOfFrames() == 0) { + error(ResOrErr); + Printer << DILineInfo(); + } else { + Printer << ResOrErr->getFrame(0); + } } else { auto ResOrErr = Symbolizer.symbolizeCode( ModuleName, {Offset, object::SectionedAddress::UndefSection}); diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index a6e32bd008e1..b981f8740dbe 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -920,6 +920,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, if (TCR_PTR(__kmp_threads[0]) == NULL) { --capacity; } + // If it is not for initializing the hidden helper team, we need to take + // __kmp_hidden_helper_threads_num out of the capacity because it is included + // in __kmp_threads_capacity. + if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { + capacity -= __kmp_hidden_helper_threads_num; + } if (__kmp_nth + new_nthreads - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > capacity) { @@ -3632,6 +3638,13 @@ int __kmp_register_root(int initial_thread) { --capacity; } + // If it is not for initializing the hidden helper team, we need to take + // __kmp_hidden_helper_threads_num out of the capacity because it is included + // in __kmp_threads_capacity. + if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { + capacity -= __kmp_hidden_helper_threads_num; + } + /* see if there are too many threads */ if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) { if (__kmp_tp_cached) { @@ -3664,7 +3677,7 @@ int __kmp_register_root(int initial_thread) { /* find an available thread slot */ // Don't reassign the zero slot since we need that to only be used by // initial thread. Slots for hidden helper threads should also be skipped. - if (initial_thread && __kmp_threads[0] == NULL) { + if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { gtid = 0; } else { for (gtid = __kmp_hidden_helper_threads_num + 1; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index b477edbbfb42..50f6a05faaf5 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -504,9 +504,10 @@ int __kmp_initial_threads_capacity(int req_nproc) { nth = (4 * __kmp_xproc); // If hidden helper task is enabled, we initialize the thread capacity with - // extra - // __kmp_hidden_helper_threads_num. - nth += __kmp_hidden_helper_threads_num; + // extra __kmp_hidden_helper_threads_num. + if (__kmp_enable_hidden_helper) { + nth += __kmp_hidden_helper_threads_num; + } if (nth > __kmp_max_nth) nth = __kmp_max_nth; diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d7021128dbd..4bcd11946694 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -326,7 +326,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - if (taskdata->td_flags.hidden_helper) { + // We don't need to map to shadow gtid if it is already hidden helper thread + if (taskdata->td_flags.hidden_helper && !KMP_HIDDEN_HELPER_THREAD(gtid)) { gtid = KMP_GTID_TO_SHADOW_GTID(gtid); thread = __kmp_threads[gtid]; } |