diff options
Diffstat (limited to 'llvm/lib/Frontend')
| -rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPConstants.cpp | 87 | ||||
| -rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPContext.cpp | 527 | ||||
| -rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 780 | 
3 files changed, 1165 insertions, 229 deletions
| diff --git a/llvm/lib/Frontend/OpenMP/OMPConstants.cpp b/llvm/lib/Frontend/OpenMP/OMPConstants.cpp deleted file mode 100644 index ec0733903e99..000000000000 --- a/llvm/lib/Frontend/OpenMP/OMPConstants.cpp +++ /dev/null @@ -1,87 +0,0 @@ -//===- OMPConstants.cpp - Helpers related to OpenMP code generation ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// - -#include "llvm/Frontend/OpenMP/OMPConstants.h" - -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" - -using namespace llvm; -using namespace omp; -using namespace types; - -Directive llvm::omp::getOpenMPDirectiveKind(StringRef Str) { -  return llvm::StringSwitch<Directive>(Str) -#define OMP_DIRECTIVE(Enum, Str) .Case(Str, Enum) -#include "llvm/Frontend/OpenMP/OMPKinds.def" -      .Default(OMPD_unknown); -} - -StringRef llvm::omp::getOpenMPDirectiveName(Directive Kind) { -  switch (Kind) { -#define OMP_DIRECTIVE(Enum, Str)                                               \ -  case Enum:                                                                   \ -    return Str; -#include "llvm/Frontend/OpenMP/OMPKinds.def" -  } -  llvm_unreachable("Invalid OpenMP directive kind"); -} - -/// Declarations for LLVM-IR types (simple, function and structure) are -/// generated below. Their names are defined and used in OpenMPKinds.def. Here -/// we provide the declarations, the initializeTypes function will provide the -/// values. -/// -///{ - -#define OMP_TYPE(VarName, InitValue) Type *llvm::omp::types::VarName = nullptr; -#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \ -  FunctionType *llvm::omp::types::VarName = nullptr;                           \ -  PointerType *llvm::omp::types::VarName##Ptr = nullptr; -#define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \ -  StructType *llvm::omp::types::VarName = nullptr;                             \ -  PointerType *llvm::omp::types::VarName##Ptr = nullptr; -#include "llvm/Frontend/OpenMP/OMPKinds.def" - -///} - -void llvm::omp::types::initializeTypes(Module &M) { -  if (Void) -    return; - -  LLVMContext &Ctx = M.getContext(); -  // Create all simple and struct types exposed by the runtime and remember -  // the llvm::PointerTypes of them for easy access later. -  StructType *T; -#define OMP_TYPE(VarName, InitValue) VarName = InitValue; -#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \ -  VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg);            \ -  VarName##Ptr = PointerType::getUnqual(VarName); -#define OMP_STRUCT_TYPE(VarName, StructName, ...)                              \ -  T = M.getTypeByName(StructName);                                             \ -  if (!T)                                                                      \ -    T = StructType::create(Ctx, {__VA_ARGS__}, StructName);                    \ -  VarName = T;                                                                 \ -  VarName##Ptr = PointerType::getUnqual(T); -#include "llvm/Frontend/OpenMP/OMPKinds.def" -} - -void llvm::omp::types::uninitializeTypes() { -#define OMP_TYPE(VarName, InitValue) VarName = nullptr; -#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \ -  VarName = nullptr;                                                           \ -  VarName##Ptr = nullptr; -#define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \ -  VarName = nullptr;                                                           \ -  VarName##Ptr = nullptr; -#include "llvm/Frontend/OpenMP/OMPKinds.def" -} diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp new file mode 100644 index 000000000000..c44e858ab5ed --- /dev/null +++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp @@ -0,0 +1,527 @@ +//===- OMPContext.cpp ------ Collection of helpers for OpenMP contexts ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements helper functions and classes to deal with OpenMP +/// contexts as used by `[begin/end] declare variant` and `metadirective`. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Frontend/OpenMP/OMPContext.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "openmp-ir-builder" + +using namespace llvm; +using namespace omp; + +OMPContext::OMPContext(bool IsDeviceCompilation, Triple TargetTriple) { +  // Add the appropriate device kind trait based on the triple and the +  // IsDeviceCompilation flag. +  ActiveTraits.set(unsigned(IsDeviceCompilation +                                ? TraitProperty::device_kind_nohost +                                : TraitProperty::device_kind_host)); +  switch (TargetTriple.getArch()) { +  case Triple::arm: +  case Triple::armeb: +  case Triple::aarch64: +  case Triple::aarch64_be: +  case Triple::aarch64_32: +  case Triple::mips: +  case Triple::mipsel: +  case Triple::mips64: +  case Triple::mips64el: +  case Triple::ppc: +  case Triple::ppc64: +  case Triple::ppc64le: +  case Triple::x86: +  case Triple::x86_64: +    ActiveTraits.set(unsigned(TraitProperty::device_kind_cpu)); +    break; +  case Triple::amdgcn: +  case Triple::nvptx: +  case Triple::nvptx64: +    ActiveTraits.set(unsigned(TraitProperty::device_kind_gpu)); +    break; +  default: +    break; +  } + +  // Add the appropriate device architecture trait based on the triple. +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \ +  if (TraitSelector::TraitSelectorEnum == TraitSelector::device_arch)          \ +    if (TargetTriple.getArch() == TargetTriple.getArchTypeForLLVMName(Str))    \ +      ActiveTraits.set(unsigned(TraitProperty::Enum)); +#include "llvm/Frontend/OpenMP/OMPKinds.def" + +  // TODO: What exactly do we want to see as device ISA trait? +  //       The discussion on the list did not seem to have come to an agreed +  //       upon solution. + +  // LLVM is the "OpenMP vendor" but we could also interpret vendor as the +  // target vendor. +  ActiveTraits.set(unsigned(TraitProperty::implementation_vendor_llvm)); + +  // The user condition true is accepted but not false. +  ActiveTraits.set(unsigned(TraitProperty::user_condition_true)); + +  // This is for sure some device. +  ActiveTraits.set(unsigned(TraitProperty::device_kind_any)); + +  LLVM_DEBUG({ +    dbgs() << "[" << DEBUG_TYPE +           << "] New OpenMP context with the following properties:\n"; +    for (unsigned Bit : ActiveTraits.set_bits()) { +      TraitProperty Property = TraitProperty(Bit); +      dbgs() << "\t " << getOpenMPContextTraitPropertyFullName(Property) +             << "\n"; +    } +  }); +} + +/// Return true if \p C0 is a subset of \p C1. Note that both arrays are +/// expected to be sorted. +template <typename T> static bool isSubset(ArrayRef<T> C0, ArrayRef<T> C1) { +#ifdef EXPENSIVE_CHECKS +  assert(llvm::is_sorted(C0) && llvm::is_sorted(C1) && +         "Expected sorted arrays!"); +#endif +  if (C0.size() > C1.size()) +    return false; +  auto It0 = C0.begin(), End0 = C0.end(); +  auto It1 = C1.begin(), End1 = C1.end(); +  while (It0 != End0) { +    if (It1 == End1) +      return false; +    if (*It0 == *It1) { +      ++It0; +      ++It1; +      continue; +    } +    ++It0; +  } +  return true; +} + +/// Return true if \p C0 is a strict subset of \p C1. Note that both arrays are +/// expected to be sorted. +template <typename T> +static bool isStrictSubset(ArrayRef<T> C0, ArrayRef<T> C1) { +  if (C0.size() >= C1.size()) +    return false; +  return isSubset<T>(C0, C1); +} + +static bool isStrictSubset(const VariantMatchInfo &VMI0, +                           const VariantMatchInfo &VMI1) { +  // If all required traits are a strict subset and the ordered vectors storing +  // the construct traits, we say it is a strict subset. Note that the latter +  // relation is not required to be strict. +  if (VMI0.RequiredTraits.count() >= VMI1.RequiredTraits.count()) +    return false; +  for (unsigned Bit : VMI0.RequiredTraits.set_bits()) +    if (!VMI1.RequiredTraits.test(Bit)) +      return false; +  if (!isSubset<TraitProperty>(VMI0.ConstructTraits, VMI1.ConstructTraits)) +    return false; +  return true; +} + +static int isVariantApplicableInContextHelper( +    const VariantMatchInfo &VMI, const OMPContext &Ctx, +    SmallVectorImpl<unsigned> *ConstructMatches, bool DeviceSetOnly) { + +  // The match kind determines if we need to match all traits, any of the +  // traits, or none of the traits for it to be an applicable context. +  enum MatchKind { MK_ALL, MK_ANY, MK_NONE }; + +  MatchKind MK = MK_ALL; +  // Determine the match kind the user wants, "all" is the default and provided +  // to the user only for completeness. +  if (VMI.RequiredTraits.test( +          unsigned(TraitProperty::implementation_extension_match_any))) +    MK = MK_ANY; +  if (VMI.RequiredTraits.test( +          unsigned(TraitProperty::implementation_extension_match_none))) +    MK = MK_NONE; + +  // Helper to deal with a single property that was (not) found in the OpenMP +  // context based on the match kind selected by the user via +  // `implementation={extensions(match_[all,any,none])}' +  auto HandleTrait = [MK](TraitProperty Property, +                          bool WasFound) -> Optional<bool> /* Result */ { +    // For kind "any" a single match is enough but we ignore non-matched +    // properties. +    if (MK == MK_ANY) { +      if (WasFound) +        return true; +      return None; +    } + +    // In "all" or "none" mode we accept a matching or non-matching property +    // respectively and move on. We are not done yet! +    if ((WasFound && MK == MK_ALL) || (!WasFound && MK == MK_NONE)) +      return None; + +    // We missed a property, provide some debug output and indicate failure. +    LLVM_DEBUG({ +      if (MK == MK_ALL) +        dbgs() << "[" << DEBUG_TYPE << "] Property " +               << getOpenMPContextTraitPropertyName(Property) +               << " was not in the OpenMP context but match kind is all.\n"; +      if (MK == MK_NONE) +        dbgs() << "[" << DEBUG_TYPE << "] Property " +               << getOpenMPContextTraitPropertyName(Property) +               << " was in the OpenMP context but match kind is none.\n"; +    }); +    return false; +  }; + +  for (unsigned Bit : VMI.RequiredTraits.set_bits()) { +    TraitProperty Property = TraitProperty(Bit); +    if (DeviceSetOnly && +        getOpenMPContextTraitSetForProperty(Property) != TraitSet::device) +      continue; + +    // So far all extensions are handled elsewhere, we skip them here as they +    // are not part of the OpenMP context. +    if (getOpenMPContextTraitSelectorForProperty(Property) == +        TraitSelector::implementation_extension) +      continue; + +    bool IsActiveTrait = Ctx.ActiveTraits.test(unsigned(Property)); +    Optional<bool> Result = HandleTrait(Property, IsActiveTrait); +    if (Result.hasValue()) +      return Result.getValue(); +  } + +  if (!DeviceSetOnly) { +    // We could use isSubset here but we also want to record the match +    // locations. +    unsigned ConstructIdx = 0, NoConstructTraits = Ctx.ConstructTraits.size(); +    for (TraitProperty Property : VMI.ConstructTraits) { +      assert(getOpenMPContextTraitSetForProperty(Property) == +                 TraitSet::construct && +             "Variant context is ill-formed!"); + +      // Verify the nesting. +      bool FoundInOrder = false; +      while (!FoundInOrder && ConstructIdx != NoConstructTraits) +        FoundInOrder = (Ctx.ConstructTraits[ConstructIdx++] == Property); +      if (ConstructMatches) +        ConstructMatches->push_back(ConstructIdx - 1); + +      Optional<bool> Result = HandleTrait(Property, FoundInOrder); +      if (Result.hasValue()) +        return Result.getValue(); + +      if (!FoundInOrder) { +        LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] Construct property " +                          << getOpenMPContextTraitPropertyName(Property) +                          << " was not nested properly.\n"); +        return false; +      } + +      // TODO: Verify SIMD +    } + +    assert(isSubset<TraitProperty>(VMI.ConstructTraits, Ctx.ConstructTraits) && +           "Broken invariant!"); +  } + +  if (MK == MK_ANY) { +    LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE +                      << "] None of the properties was in the OpenMP context " +                         "but match kind is any.\n"); +    return false; +  } + +  return true; +} + +bool llvm::omp::isVariantApplicableInContext(const VariantMatchInfo &VMI, +                                             const OMPContext &Ctx, +                                             bool DeviceSetOnly) { +  return isVariantApplicableInContextHelper( +      VMI, Ctx, /* ConstructMatches */ nullptr, DeviceSetOnly); +} + +static APInt getVariantMatchScore(const VariantMatchInfo &VMI, +                                  const OMPContext &Ctx, +                                  SmallVectorImpl<unsigned> &ConstructMatches) { +  APInt Score(64, 1); + +  unsigned NoConstructTraits = VMI.ConstructTraits.size(); +  for (unsigned Bit : VMI.RequiredTraits.set_bits()) { +    TraitProperty Property = TraitProperty(Bit); +    // If there is a user score attached, use it. +    if (VMI.ScoreMap.count(Property)) { +      const APInt &UserScore = VMI.ScoreMap.lookup(Property); +      assert(UserScore.uge(0) && "Expect non-negative user scores!"); +      Score += UserScore.getZExtValue(); +      continue; +    } + +    switch (getOpenMPContextTraitSetForProperty(Property)) { +    case TraitSet::construct: +      // We handle the construct traits later via the VMI.ConstructTraits +      // container. +      continue; +    case TraitSet::implementation: +      // No effect on the score (implementation defined). +      continue; +    case TraitSet::user: +      // No effect on the score. +      continue; +    case TraitSet::device: +      // Handled separately below. +      break; +    case TraitSet::invalid: +      llvm_unreachable("Unknown trait set is not to be used!"); +    } + +    // device={kind(any)} is "as if" no kind selector was specified. +    if (Property == TraitProperty::device_kind_any) +      continue; + +    switch (getOpenMPContextTraitSelectorForProperty(Property)) { +    case TraitSelector::device_kind: +      Score += (1ULL << (NoConstructTraits + 0)); +      continue; +    case TraitSelector::device_arch: +      Score += (1ULL << (NoConstructTraits + 1)); +      continue; +    case TraitSelector::device_isa: +      Score += (1ULL << (NoConstructTraits + 2)); +      continue; +    default: +      continue; +    } +  } + +  unsigned ConstructIdx = 0; +  assert(NoConstructTraits == ConstructMatches.size() && +         "Mismatch in the construct traits!"); +  for (TraitProperty Property : VMI.ConstructTraits) { +    assert(getOpenMPContextTraitSetForProperty(Property) == +               TraitSet::construct && +           "Ill-formed variant match info!"); +    (void)Property; +    // ConstructMatches is the position p - 1 and we need 2^(p-1). +    Score += (1ULL << ConstructMatches[ConstructIdx++]); +  } + +  LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] Variant has a score of " << Score +                    << "\n"); +  return Score; +} + +int llvm::omp::getBestVariantMatchForContext( +    const SmallVectorImpl<VariantMatchInfo> &VMIs, const OMPContext &Ctx) { + +  APInt BestScore(64, 0); +  int BestVMIIdx = -1; +  const VariantMatchInfo *BestVMI = nullptr; + +  for (unsigned u = 0, e = VMIs.size(); u < e; ++u) { +    const VariantMatchInfo &VMI = VMIs[u]; + +    SmallVector<unsigned, 8> ConstructMatches; +    // If the variant is not applicable its not the best. +    if (!isVariantApplicableInContextHelper(VMI, Ctx, &ConstructMatches, +                                            /* DeviceSetOnly */ false)) +      continue; +    // Check if its clearly not the best. +    APInt Score = getVariantMatchScore(VMI, Ctx, ConstructMatches); +    if (Score.ult(BestScore)) +      continue; +    // Equal score need subset checks. +    if (Score.eq(BestScore)) { +      // Strict subset are never best. +      if (isStrictSubset(VMI, *BestVMI)) +        continue; +      // Same score and the current best is no strict subset so we keep it. +      if (!isStrictSubset(*BestVMI, VMI)) +        continue; +    } +    // New best found. +    BestVMI = &VMI; +    BestVMIIdx = u; +    BestScore = Score; +  } + +  return BestVMIIdx; +} + +TraitSet llvm::omp::getOpenMPContextTraitSetKind(StringRef S) { +  return StringSwitch<TraitSet>(S) +#define OMP_TRAIT_SET(Enum, Str) .Case(Str, TraitSet::Enum) +#include "llvm/Frontend/OpenMP/OMPKinds.def" +      .Default(TraitSet::invalid); +} + +TraitSet +llvm::omp::getOpenMPContextTraitSetForSelector(TraitSelector Selector) { +  switch (Selector) { +#define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp)                   \ +  case TraitSelector::Enum:                                                    \ +    return TraitSet::TraitSetEnum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  } +  llvm_unreachable("Unknown trait selector!"); +} +TraitSet +llvm::omp::getOpenMPContextTraitSetForProperty(TraitProperty Property) { +  switch (Property) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \ +  case TraitProperty::Enum:                                                    \ +    return TraitSet::TraitSetEnum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  } +  llvm_unreachable("Unknown trait set!"); +} +StringRef llvm::omp::getOpenMPContextTraitSetName(TraitSet Kind) { +  switch (Kind) { +#define OMP_TRAIT_SET(Enum, Str)                                               \ +  case TraitSet::Enum:                                                         \ +    return Str; +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  } +  llvm_unreachable("Unknown trait set!"); +} + +TraitSelector llvm::omp::getOpenMPContextTraitSelectorKind(StringRef S) { +  return StringSwitch<TraitSelector>(S) +#define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp)                   \ +  .Case(Str, TraitSelector::Enum) +#include "llvm/Frontend/OpenMP/OMPKinds.def" +      .Default(TraitSelector::invalid); +} +TraitSelector +llvm::omp::getOpenMPContextTraitSelectorForProperty(TraitProperty Property) { +  switch (Property) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \ +  case TraitProperty::Enum:                                                    \ +    return TraitSelector::TraitSelectorEnum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  } +  llvm_unreachable("Unknown trait set!"); +} +StringRef llvm::omp::getOpenMPContextTraitSelectorName(TraitSelector Kind) { +  switch (Kind) { +#define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp)                   \ +  case TraitSelector::Enum:                                                    \ +    return Str; +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  } +  llvm_unreachable("Unknown trait selector!"); +} + +TraitProperty llvm::omp::getOpenMPContextTraitPropertyKind(TraitSet Set, +                                                           StringRef S) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \ +  if (Set == TraitSet::TraitSetEnum && Str == S)                               \ +    return TraitProperty::Enum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  return TraitProperty::invalid; +} +TraitProperty +llvm::omp::getOpenMPContextTraitPropertyForSelector(TraitSelector Selector) { +  return StringSwitch<TraitProperty>( +             getOpenMPContextTraitSelectorName(Selector)) +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \ +  .Case(Str, Selector == TraitSelector::TraitSelectorEnum                      \ +                 ? TraitProperty::Enum                                         \ +                 : TraitProperty::invalid) +#include "llvm/Frontend/OpenMP/OMPKinds.def" +      .Default(TraitProperty::invalid); +} +StringRef llvm::omp::getOpenMPContextTraitPropertyName(TraitProperty Kind) { +  switch (Kind) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \ +  case TraitProperty::Enum:                                                    \ +    return Str; +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  } +  llvm_unreachable("Unknown trait property!"); +} +StringRef llvm::omp::getOpenMPContextTraitPropertyFullName(TraitProperty Kind) { +  switch (Kind) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \ +  case TraitProperty::Enum:                                                    \ +    return "(" #TraitSetEnum "," #TraitSelectorEnum "," Str ")"; +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  } +  llvm_unreachable("Unknown trait property!"); +} + +bool llvm::omp::isValidTraitSelectorForTraitSet(TraitSelector Selector, +                                                TraitSet Set, +                                                bool &AllowsTraitScore, +                                                bool &RequiresProperty) { +  AllowsTraitScore = Set != TraitSet::construct && Set != TraitSet::device; +  switch (Selector) { +#define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp)                   \ +  case TraitSelector::Enum:                                                    \ +    RequiresProperty = ReqProp;                                                \ +    return Set == TraitSet::TraitSetEnum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  } +  llvm_unreachable("Unknown trait selector!"); +} + +bool llvm::omp::isValidTraitPropertyForTraitSetAndSelector( +    TraitProperty Property, TraitSelector Selector, TraitSet Set) { +  switch (Property) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \ +  case TraitProperty::Enum:                                                    \ +    return Set == TraitSet::TraitSetEnum &&                                    \ +           Selector == TraitSelector::TraitSelectorEnum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  } +  llvm_unreachable("Unknown trait property!"); +} + +std::string llvm::omp::listOpenMPContextTraitSets() { +  std::string S; +#define OMP_TRAIT_SET(Enum, Str)                                               \ +  if (StringRef(Str) != "invalid")                                             \ +    S.append("'").append(Str).append("'").append(" "); +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  S.pop_back(); +  return S; +} + +std::string llvm::omp::listOpenMPContextTraitSelectors(TraitSet Set) { +  std::string S; +#define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp)                   \ +  if (TraitSet::TraitSetEnum == Set && StringRef(Str) != "Invalid")            \ +    S.append("'").append(Str).append("'").append(" "); +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  S.pop_back(); +  return S; +} + +std::string +llvm::omp::listOpenMPContextTraitProperties(TraitSet Set, +                                            TraitSelector Selector) { +  std::string S; +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \ +  if (TraitSet::TraitSetEnum == Set &&                                         \ +      TraitSelector::TraitSelectorEnum == Selector &&                          \ +      StringRef(Str) != "invalid")                                             \ +    S.append("'").append(Str).append("'").append(" "); +#include "llvm/Frontend/OpenMP/OMPKinds.def" +  if (S.empty()) +    return "<none>"; +  S.pop_back(); +  return S; +} diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 739c2998baa8..9468a3aa3c8d 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -18,8 +18,8 @@  #include "llvm/ADT/StringSwitch.h"  #include "llvm/IR/CFG.h"  #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/MDBuilder.h"  #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Error.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -31,7 +31,6 @@  using namespace llvm;  using namespace omp; -using namespace types;  static cl::opt<bool>      OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, @@ -59,13 +58,17 @@ void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {    }  } -Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) { +FunctionCallee +OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) { +  FunctionType *FnTy = nullptr;    Function *Fn = nullptr;    // Try to find the declation in the module first.    switch (FnID) {  #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...)                          \    case Enum:                                                                   \ +    FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__},        \ +                             IsVarArg);                                        \      Fn = M.getFunction(Str);                                                   \      break;  #include "llvm/Frontend/OpenMP/OMPKinds.def" @@ -74,25 +77,113 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) {    if (!Fn) {      // Create a new declaration if we need one.      switch (FnID) { -#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...)                          \ +#define OMP_RTL(Enum, Str, ...)                                                \    case Enum:                                                                   \ -    Fn = Function::Create(FunctionType::get(ReturnType,                        \ -                                            ArrayRef<Type *>{__VA_ARGS__},     \ -                                            IsVarArg),                         \ -                          GlobalValue::ExternalLinkage, Str, M);               \ +    Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M);         \      break;  #include "llvm/Frontend/OpenMP/OMPKinds.def"      } +    // Add information if the runtime function takes a callback function +    if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) { +      if (!Fn->hasMetadata(LLVMContext::MD_callback)) { +        LLVMContext &Ctx = Fn->getContext(); +        MDBuilder MDB(Ctx); +        // Annotate the callback behavior of the runtime function: +        //  - The callback callee is argument number 2 (microtask). +        //  - The first two arguments of the callback callee are unknown (-1). +        //  - All variadic arguments to the runtime function are passed to the +        //    callback callee. +        Fn->addMetadata( +            LLVMContext::MD_callback, +            *MDNode::get(Ctx, {MDB.createCallbackEncoding( +                                  2, {-1, -1}, /* VarArgsArePassed */ true)})); +      } +    } + +    LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName() +                      << " with type " << *Fn->getFunctionType() << "\n");      addAttributes(FnID, *Fn); + +  } else { +    LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName() +                      << " with type " << *Fn->getFunctionType() << "\n");    }    assert(Fn && "Failed to create OpenMP runtime function"); + +  // Cast the function to the expected type if necessary +  Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo()); +  return {FnTy, C}; +} + +Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { +  FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID); +  auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee()); +  assert(Fn && "Failed to create OpenMP runtime function pointer");    return Fn;  }  void OpenMPIRBuilder::initialize() { initializeTypes(M); } +void OpenMPIRBuilder::finalize() { +  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; +  SmallVector<BasicBlock *, 32> Blocks; +  for (OutlineInfo &OI : OutlineInfos) { +    ParallelRegionBlockSet.clear(); +    Blocks.clear(); +    OI.collectBlocks(ParallelRegionBlockSet, Blocks); + +    Function *OuterFn = OI.EntryBB->getParent(); +    CodeExtractorAnalysisCache CEAC(*OuterFn); +    CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, +                            /* AggregateArgs */ false, +                            /* BlockFrequencyInfo */ nullptr, +                            /* BranchProbabilityInfo */ nullptr, +                            /* AssumptionCache */ nullptr, +                            /* AllowVarArgs */ true, +                            /* AllowAlloca */ true, +                            /* Suffix */ ".omp_par"); + +    LLVM_DEBUG(dbgs() << "Before     outlining: " << *OuterFn << "\n"); +    LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() +                      << " Exit: " << OI.ExitBB->getName() << "\n"); +    assert(Extractor.isEligible() && +           "Expected OpenMP outlining to be possible!"); + +    Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); + +    LLVM_DEBUG(dbgs() << "After      outlining: " << *OuterFn << "\n"); +    LLVM_DEBUG(dbgs() << "   Outlined function: " << *OutlinedFn << "\n"); +    assert(OutlinedFn->getReturnType()->isVoidTy() && +           "OpenMP outlined functions should not return a value!"); + +    // For compability with the clang CG we move the outlined function after the +    // one with the parallel region. +    OutlinedFn->removeFromParent(); +    M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); + +    // Remove the artificial entry introduced by the extractor right away, we +    // made our own entry block after all. +    { +      BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); +      assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); +      assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); +      OI.EntryBB->moveBefore(&ArtificialEntry); +      ArtificialEntry.eraseFromParent(); +    } +    assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); +    assert(OutlinedFn && OutlinedFn->getNumUses() == 1); + +    // Run a user callback, e.g. to add attributes. +    if (OI.PostOutlineCB) +      OI.PostOutlineCB(*OutlinedFn); +  } + +  // Allow finalize to be called multiple times. +  OutlineInfos.clear(); +} +  Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,                                           IdentFlag LocFlags) {    // Enable "C-mode". @@ -165,7 +256,7 @@ OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {  Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {    return Builder.CreateCall( -      getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num), Ident, +      getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,        "omp_global_thread_num");  } @@ -212,10 +303,11 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,    bool UseCancelBarrier =        !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel); -  Value *Result = Builder.CreateCall( -      getOrCreateRuntimeFunction(UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier -                                                  : OMPRTL___kmpc_barrier), -      Args); +  Value *Result = +      Builder.CreateCall(getOrCreateRuntimeFunctionPtr( +                             UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier +                                              : OMPRTL___kmpc_barrier), +                         Args);    if (UseCancelBarrier && CheckCancelFlag)      emitCancelationCheckImpl(Result, OMPD_parallel); @@ -253,7 +345,7 @@ OpenMPIRBuilder::CreateCancel(const LocationDescription &Loc,    Value *Ident = getOrCreateIdent(SrcLocStr);    Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};    Value *Result = Builder.CreateCall( -      getOrCreateRuntimeFunction(OMPRTL___kmpc_cancel), Args); +      getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);    // The actual cancel logic is shared with others, e.g., cancel_barriers.    emitCancelationCheckImpl(Result, CanceledDirective); @@ -318,7 +410,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(          Ident, ThreadID,          Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};      Builder.CreateCall( -        getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args); +        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);    }    if (ProcBind != OMP_PROC_BIND_default) { @@ -326,8 +418,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(      Value *Args[] = {          Ident, ThreadID,          ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; -    Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind), -                       Args); +    Builder.CreateCall( +        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);    }    BasicBlock *InsertBB = Builder.GetInsertBlock(); @@ -415,32 +507,135 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(    // PRegionExitBB          <- A common exit to simplify block collection.    // -  LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n"); +  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");    // Let the caller create the body.    assert(BodyGenCB && "Expected body generation callback!");    InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());    BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB); -  LLVM_DEBUG(dbgs() << "After  body codegen: " << *UI->getFunction() << "\n"); +  LLVM_DEBUG(dbgs() << "After  body codegen: " << *OuterFn << "\n"); + +  FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); +  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { +    if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { +      llvm::LLVMContext &Ctx = F->getContext(); +      MDBuilder MDB(Ctx); +      // Annotate the callback behavior of the __kmpc_fork_call: +      //  - The callback callee is argument number 2 (microtask). +      //  - The first two arguments of the callback callee are unknown (-1). +      //  - All variadic arguments to the __kmpc_fork_call are passed to the +      //    callback callee. +      F->addMetadata( +          llvm::LLVMContext::MD_callback, +          *llvm::MDNode::get( +              Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, +                                               /* VarArgsArePassed */ true)})); +    } +  } + +  OutlineInfo OI; +  OI.PostOutlineCB = [=](Function &OutlinedFn) { +    // Add some known attributes. +    OutlinedFn.addParamAttr(0, Attribute::NoAlias); +    OutlinedFn.addParamAttr(1, Attribute::NoAlias); +    OutlinedFn.addFnAttr(Attribute::NoUnwind); +    OutlinedFn.addFnAttr(Attribute::NoRecurse); + +    assert(OutlinedFn.arg_size() >= 2 && +           "Expected at least tid and bounded tid as arguments"); +    unsigned NumCapturedVars = +        OutlinedFn.arg_size() - /* tid & bounded tid */ 2; + +    CallInst *CI = cast<CallInst>(OutlinedFn.user_back()); +    CI->getParent()->setName("omp_parallel"); +    Builder.SetInsertPoint(CI); + +    // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); +    Value *ForkCallArgs[] = { +        Ident, Builder.getInt32(NumCapturedVars), +        Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; + +    SmallVector<Value *, 16> RealArgs; +    RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); +    RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); + +    Builder.CreateCall(RTLFn, RealArgs); + +    LLVM_DEBUG(dbgs() << "With fork_call placed: " +                      << *Builder.GetInsertBlock()->getParent() << "\n"); + +    InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); + +    // Initialize the local TID stack location with the argument value. +    Builder.SetInsertPoint(PrivTID); +    Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); +    Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); + +    // If no "if" clause was present we do not need the call created during +    // outlining, otherwise we reuse it in the serialized parallel region. +    if (!ElseTI) { +      CI->eraseFromParent(); +    } else { + +      // If an "if" clause was present we are now generating the serialized +      // version into the "else" branch. +      Builder.SetInsertPoint(ElseTI); + +      // Build calls __kmpc_serialized_parallel(&Ident, GTid); +      Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; +      Builder.CreateCall( +          getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel), +          SerializedParallelCallArgs); + +      // OutlinedFn(>id, &zero, CapturedStruct); +      CI->removeFromParent(); +      Builder.Insert(CI); + +      // __kmpc_end_serialized_parallel(&Ident, GTid); +      Value *EndArgs[] = {Ident, ThreadID}; +      Builder.CreateCall( +          getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel), +          EndArgs); + +      LLVM_DEBUG(dbgs() << "With serialized parallel region: " +                        << *Builder.GetInsertBlock()->getParent() << "\n"); +    } + +    for (Instruction *I : ToBeDeleted) +      I->eraseFromParent(); +  }; + +  // Adjust the finalization stack, verify the adjustment, and call the +  // finalize function a last time to finalize values between the pre-fini +  // block and the exit block if we left the parallel "the normal way". +  auto FiniInfo = FinalizationStack.pop_back_val(); +  (void)FiniInfo; +  assert(FiniInfo.DK == OMPD_parallel && +         "Unexpected finalization stack state!"); + +  Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); + +  InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); +  FiniCB(PreFiniIP); + +  OI.EntryBB = PRegEntryBB; +  OI.ExitBB = PRegExitBB;    SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; -  SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist; -  ParallelRegionBlockSet.insert(PRegEntryBB); -  ParallelRegionBlockSet.insert(PRegExitBB); +  SmallVector<BasicBlock *, 32> Blocks; +  OI.collectBlocks(ParallelRegionBlockSet, Blocks); -  // Collect all blocks in-between PRegEntryBB and PRegExitBB. -  Worklist.push_back(PRegEntryBB); -  while (!Worklist.empty()) { -    BasicBlock *BB = Worklist.pop_back_val(); -    ParallelRegionBlocks.push_back(BB); -    for (BasicBlock *SuccBB : successors(BB)) -      if (ParallelRegionBlockSet.insert(SuccBB).second) -        Worklist.push_back(SuccBB); -  } +  // Ensure a single exit node for the outlined region by creating one. +  // We might have multiple incoming edges to the exit now due to finalizations, +  // e.g., cancel calls that cause the control flow to leave the region. +  BasicBlock *PRegOutlinedExitBB = PRegExitBB; +  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); +  PRegOutlinedExitBB->setName("omp.par.outlined.exit"); +  Blocks.push_back(PRegOutlinedExitBB);    CodeExtractorAnalysisCache CEAC(*OuterFn); -  CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr, +  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,                            /* AggregateArgs */ false,                            /* BlockFrequencyInfo */ nullptr,                            /* BranchProbabilityInfo */ nullptr, @@ -455,10 +650,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(    Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);    Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); -  LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n"); +  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");    FunctionCallee TIDRTLFn = -      getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num); +      getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);    auto PrivHelper = [&](Value &V) {      if (&V == TIDAddr || &V == ZeroAddr) @@ -491,142 +686,443 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(      LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");      PrivHelper(*Input);    } -  for (Value *Output : Outputs) { -    LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); -    PrivHelper(*Output); -  } +  assert(Outputs.empty() && +         "OpenMP outlining should not produce live-out values!"); -  LLVM_DEBUG(dbgs() << "After  privatization: " << *UI->getFunction() << "\n"); +  LLVM_DEBUG(dbgs() << "After  privatization: " << *OuterFn << "\n");    LLVM_DEBUG({ -    for (auto *BB : ParallelRegionBlocks) +    for (auto *BB : Blocks)        dbgs() << " PBR: " << BB->getName() << "\n";    }); -  // Add some known attributes to the outlined function. -  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); -  OutlinedFn->addParamAttr(0, Attribute::NoAlias); -  OutlinedFn->addParamAttr(1, Attribute::NoAlias); -  OutlinedFn->addFnAttr(Attribute::NoUnwind); -  OutlinedFn->addFnAttr(Attribute::NoRecurse); - -  LLVM_DEBUG(dbgs() << "After      outlining: " << *UI->getFunction() << "\n"); -  LLVM_DEBUG(dbgs() << "   Outlined function: " << *OutlinedFn << "\n"); - -  // For compability with the clang CG we move the outlined function after the -  // one with the parallel region. -  OutlinedFn->removeFromParent(); -  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); - -  // Remove the artificial entry introduced by the extractor right away, we -  // made our own entry block after all. -  { -    BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); -    assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB); -    assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry); -    PRegEntryBB->moveBefore(&ArtificialEntry); -    ArtificialEntry.eraseFromParent(); -  } -  LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n"); -  assert(&OutlinedFn->getEntryBlock() == PRegEntryBB); +  // Register the outlined info. +  addOutlineInfo(std::move(OI)); + +  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); +  UI->eraseFromParent(); -  assert(OutlinedFn && OutlinedFn->getNumUses() == 1); -  assert(OutlinedFn->arg_size() >= 2 && -         "Expected at least tid and bounded tid as arguments"); -  unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2; +  return AfterIP; +} -  CallInst *CI = cast<CallInst>(OutlinedFn->user_back()); -  CI->getParent()->setName("omp_parallel"); -  Builder.SetInsertPoint(CI); +void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { +  // Build call void __kmpc_flush(ident_t *loc) +  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); +  Value *Args[] = {getOrCreateIdent(SrcLocStr)}; -  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); -  Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars), -                           Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)}; +  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); +} -  SmallVector<Value *, 16> RealArgs; -  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); -  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); +void OpenMPIRBuilder::CreateFlush(const LocationDescription &Loc) { +  if (!updateToLocation(Loc)) +    return; +  emitFlush(Loc); +} -  FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); -  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { -    if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { -      llvm::LLVMContext &Ctx = F->getContext(); -      MDBuilder MDB(Ctx); -      // Annotate the callback behavior of the __kmpc_fork_call: -      //  - The callback callee is argument number 2 (microtask). -      //  - The first two arguments of the callback callee are unknown (-1). -      //  - All variadic arguments to the __kmpc_fork_call are passed to the -      //    callback callee. -      F->addMetadata( -          llvm::LLVMContext::MD_callback, -          *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( -                                      2, {-1, -1}, -                                      /* VarArgsArePassed */ true)})); +void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { +  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 +  // global_tid); +  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); +  Value *Ident = getOrCreateIdent(SrcLocStr); +  Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; + +  // Ignore return result until untied tasks are supported. +  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), +                     Args); +} + +void OpenMPIRBuilder::CreateTaskwait(const LocationDescription &Loc) { +  if (!updateToLocation(Loc)) +    return; +  emitTaskwaitImpl(Loc); +} + +void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { +  // Build call __kmpc_omp_taskyield(loc, thread_id, 0); +  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); +  Value *Ident = getOrCreateIdent(SrcLocStr); +  Constant *I32Null = ConstantInt::getNullValue(Int32); +  Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; + +  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), +                     Args); +} + +void OpenMPIRBuilder::CreateTaskyield(const LocationDescription &Loc) { +  if (!updateToLocation(Loc)) +    return; +  emitTaskyieldImpl(Loc); +} + +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::CreateMaster(const LocationDescription &Loc, +                              BodyGenCallbackTy BodyGenCB, +                              FinalizeCallbackTy FiniCB) { + +  if (!updateToLocation(Loc)) +    return Loc.IP; + +  Directive OMPD = Directive::OMPD_master; +  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); +  Value *Ident = getOrCreateIdent(SrcLocStr); +  Value *ThreadId = getOrCreateThreadID(Ident); +  Value *Args[] = {Ident, ThreadId}; + +  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master); +  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); + +  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master); +  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); + +  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, +                              /*Conditional*/ true, /*hasFinalize*/ true); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::CreateCritical( +    const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, +    FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { + +  if (!updateToLocation(Loc)) +    return Loc.IP; + +  Directive OMPD = Directive::OMPD_critical; +  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); +  Value *Ident = getOrCreateIdent(SrcLocStr); +  Value *ThreadId = getOrCreateThreadID(Ident); +  Value *LockVar = getOMPCriticalRegionLock(CriticalName); +  Value *Args[] = {Ident, ThreadId, LockVar}; + +  SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args)); +  Function *RTFn = nullptr; +  if (HintInst) { +    // Add Hint to entry Args and create call +    EnterArgs.push_back(HintInst); +    RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint); +  } else { +    RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical); +  } +  Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs); + +  Function *ExitRTLFn = +      getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical); +  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); + +  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, +                              /*Conditional*/ false, /*hasFinalize*/ true); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( +    Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, +    BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, +    bool HasFinalize) { + +  if (HasFinalize) +    FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false}); + +  // Create inlined region's entry and body blocks, in preparation +  // for conditional creation +  BasicBlock *EntryBB = Builder.GetInsertBlock(); +  Instruction *SplitPos = EntryBB->getTerminator(); +  if (!isa_and_nonnull<BranchInst>(SplitPos)) +    SplitPos = new UnreachableInst(Builder.getContext(), EntryBB); +  BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end"); +  BasicBlock *FiniBB = +      EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); + +  Builder.SetInsertPoint(EntryBB->getTerminator()); +  emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); + +  // generate body +  BodyGenCB(/* AllocaIP */ InsertPointTy(), +            /* CodeGenIP */ Builder.saveIP(), *FiniBB); + +  // If we didn't emit a branch to FiniBB during body generation, it means +  // FiniBB is unreachable (e.g. while(1);). stop generating all the +  // unreachable blocks, and remove anything we are not going to use. +  auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); +  if (SkipEmittingRegion) { +    FiniBB->eraseFromParent(); +    ExitCall->eraseFromParent(); +    // Discard finalization if we have it. +    if (HasFinalize) { +      assert(!FinalizationStack.empty() && +             "Unexpected finalization stack state!"); +      FinalizationStack.pop_back();      } +  } else { +    // emit exit call and do any needed finalization. +    auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); +    assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && +           FiniBB->getTerminator()->getSuccessor(0) == ExitBB && +           "Unexpected control flow graph state!!"); +    emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); +    assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && +           "Unexpected Control Flow State!"); +    MergeBlockIntoPredecessor(FiniBB);    } -  Builder.CreateCall(RTLFn, RealArgs); +  // If we are skipping the region of a non conditional, remove the exit +  // block, and clear the builder's insertion point. +  assert(SplitPos->getParent() == ExitBB && +         "Unexpected Insertion point location!"); +  if (!Conditional && SkipEmittingRegion) { +    ExitBB->eraseFromParent(); +    Builder.ClearInsertionPoint(); +  } else { +    auto merged = MergeBlockIntoPredecessor(ExitBB); +    BasicBlock *ExitPredBB = SplitPos->getParent(); +    auto InsertBB = merged ? ExitPredBB : ExitBB; +    if (!isa_and_nonnull<BranchInst>(SplitPos)) +      SplitPos->eraseFromParent(); +    Builder.SetInsertPoint(InsertBB); +  } -  LLVM_DEBUG(dbgs() << "With fork_call placed: " -                    << *Builder.GetInsertBlock()->getParent() << "\n"); +  return Builder.saveIP(); +} -  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); -  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( +    Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) { + +  // if nothing to do, Return current insertion point. +  if (!Conditional) +    return Builder.saveIP(); + +  BasicBlock *EntryBB = Builder.GetInsertBlock(); +  Value *CallBool = Builder.CreateIsNotNull(EntryCall); +  auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); +  auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); + +  // Emit thenBB and set the Builder's insertion point there for +  // body generation next. Place the block after the current block. +  Function *CurFn = EntryBB->getParent(); +  CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB); + +  // Move Entry branch to end of ThenBB, and replace with conditional +  // branch (If-stmt) +  Instruction *EntryBBTI = EntryBB->getTerminator(); +  Builder.CreateCondBr(CallBool, ThenBB, ExitBB); +  EntryBBTI->removeFromParent(); +  Builder.SetInsertPoint(UI); +  Builder.Insert(EntryBBTI);    UI->eraseFromParent(); +  Builder.SetInsertPoint(ThenBB->getTerminator()); + +  // return an insertion point to ExitBB. +  return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt()); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( +    omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, +    bool HasFinalize) { + +  Builder.restoreIP(FinIP); + +  // If there is finalization to do, emit it before the exit call +  if (HasFinalize) { +    assert(!FinalizationStack.empty() && +           "Unexpected finalization stack state!"); + +    FinalizationInfo Fi = FinalizationStack.pop_back_val(); +    assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); + +    Fi.FiniCB(FinIP); + +    BasicBlock *FiniBB = FinIP.getBlock(); +    Instruction *FiniBBTI = FiniBB->getTerminator(); -  // Initialize the local TID stack location with the argument value. -  Builder.SetInsertPoint(PrivTID); -  Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin(); -  Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); +    // set Builder IP for call creation +    Builder.SetInsertPoint(FiniBBTI); +  } + +  // place the Exitcall as last instruction before Finalization block terminator +  ExitCall->removeFromParent(); +  Builder.Insert(ExitCall); + +  return IRBuilder<>::InsertPoint(ExitCall->getParent(), +                                  ExitCall->getIterator()); +} -  // If no "if" clause was present we do not need the call created during -  // outlining, otherwise we reuse it in the serialized parallel region. -  if (!ElseTI) { -    CI->eraseFromParent(); +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::CreateCopyinClauseBlocks( +    InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, +    llvm::IntegerType *IntPtrTy, bool BranchtoEnd) { +  if (!IP.isSet()) +    return IP; + +  IRBuilder<>::InsertPointGuard IPG(Builder); + +  // creates the following CFG structure +  //	   OMP_Entry : (MasterAddr != PrivateAddr)? +  //       F     T +  //       |      \ +  //       |     copin.not.master +  //       |      / +  //       v     / +  //   copyin.not.master.end +  //		     | +  //         v +  //   OMP.Entry.Next + +  BasicBlock *OMP_Entry = IP.getBlock(); +  Function *CurFn = OMP_Entry->getParent(); +  BasicBlock *CopyBegin = +      BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn); +  BasicBlock *CopyEnd = nullptr; + +  // If entry block is terminated, split to preserve the branch to following +  // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is. +  if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) { +    CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(), +                                         "copyin.not.master.end"); +    OMP_Entry->getTerminator()->eraseFromParent();    } else { +    CopyEnd = +        BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn); +  } -    // If an "if" clause was present we are now generating the serialized -    // version into the "else" branch. -    Builder.SetInsertPoint(ElseTI); +  Builder.SetInsertPoint(OMP_Entry); +  Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy); +  Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy); +  Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr); +  Builder.CreateCondBr(cmp, CopyBegin, CopyEnd); -    // Build calls __kmpc_serialized_parallel(&Ident, GTid); -    Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; -    Builder.CreateCall( -        getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel), -        SerializedParallelCallArgs); +  Builder.SetInsertPoint(CopyBegin); +  if (BranchtoEnd) +    Builder.SetInsertPoint(Builder.CreateBr(CopyEnd)); -    // OutlinedFn(>id, &zero, CapturedStruct); -    CI->removeFromParent(); -    Builder.Insert(CI); +  return Builder.saveIP(); +} -    // __kmpc_end_serialized_parallel(&Ident, GTid); -    Value *EndArgs[] = {Ident, ThreadID}; -    Builder.CreateCall( -        getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel), -        EndArgs); +CallInst *OpenMPIRBuilder::CreateOMPAlloc(const LocationDescription &Loc, +                                          Value *Size, Value *Allocator, +                                          std::string Name) { +  IRBuilder<>::InsertPointGuard IPG(Builder); +  Builder.restoreIP(Loc.IP); -    LLVM_DEBUG(dbgs() << "With serialized parallel region: " -                      << *Builder.GetInsertBlock()->getParent() << "\n"); +  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); +  Value *Ident = getOrCreateIdent(SrcLocStr); +  Value *ThreadId = getOrCreateThreadID(Ident); +  Value *Args[] = {ThreadId, Size, Allocator}; + +  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc); + +  return Builder.CreateCall(Fn, Args, Name); +} + +CallInst *OpenMPIRBuilder::CreateOMPFree(const LocationDescription &Loc, +                                         Value *Addr, Value *Allocator, +                                         std::string Name) { +  IRBuilder<>::InsertPointGuard IPG(Builder); +  Builder.restoreIP(Loc.IP); + +  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); +  Value *Ident = getOrCreateIdent(SrcLocStr); +  Value *ThreadId = getOrCreateThreadID(Ident); +  Value *Args[] = {ThreadId, Addr, Allocator}; +  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); +  return Builder.CreateCall(Fn, Args, Name); +} + +CallInst *OpenMPIRBuilder::CreateCachedThreadPrivate( +    const LocationDescription &Loc, llvm::Value *Pointer, +    llvm::ConstantInt *Size, const llvm::Twine &Name) { +  IRBuilder<>::InsertPointGuard IPG(Builder); +  Builder.restoreIP(Loc.IP); + +  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); +  Value *Ident = getOrCreateIdent(SrcLocStr); +  Value *ThreadId = getOrCreateThreadID(Ident); +  Constant *ThreadPrivateCache = +      getOrCreateOMPInternalVariable(Int8PtrPtr, Name); +  llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; + +  Function *Fn = +  		getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); + +  return Builder.CreateCall(Fn, Args); +} + +std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, +                                                   StringRef FirstSeparator, +                                                   StringRef Separator) { +  SmallString<128> Buffer; +  llvm::raw_svector_ostream OS(Buffer); +  StringRef Sep = FirstSeparator; +  for (StringRef Part : Parts) { +    OS << Sep << Part; +    Sep = Separator;    } +  return OS.str().str(); +} -  // Adjust the finalization stack, verify the adjustment, and call the -  // finalize function a last time to finalize values between the pre-fini block -  // and the exit block if we left the parallel "the normal way". -  auto FiniInfo = FinalizationStack.pop_back_val(); -  (void)FiniInfo; -  assert(FiniInfo.DK == OMPD_parallel && -         "Unexpected finalization stack state!"); +Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( +    llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { +  // TODO: Replace the twine arg with stringref to get rid of the conversion +  // logic. However This is taken from current implementation in clang as is. +  // Since this method is used in many places exclusively for OMP internal use +  // we will keep it as is for temporarily until we move all users to the +  // builder and then, if possible, fix it everywhere in one go. +  SmallString<256> Buffer; +  llvm::raw_svector_ostream Out(Buffer); +  Out << Name; +  StringRef RuntimeName = Out.str(); +  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; +  if (Elem.second) { +    assert(Elem.second->getType()->getPointerElementType() == Ty && +           "OMP internal variable has different type than requested"); +  } else { +    // TODO: investigate the appropriate linkage type used for the global +    // variable for possibly changing that to internal or private, or maybe +    // create different versions of the function for different OMP internal +    // variables. +    Elem.second = new llvm::GlobalVariable( +        M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, +        llvm::Constant::getNullValue(Ty), Elem.first(), +        /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, +        AddressSpace); +  } -  Instruction *PreFiniTI = PRegPreFiniBB->getTerminator(); -  assert(PreFiniTI->getNumSuccessors() == 1 && -         PreFiniTI->getSuccessor(0)->size() == 1 && -         isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) && -         "Unexpected CFG structure!"); +  return Elem.second; +} -  InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator()); -  FiniCB(PreFiniIP); +Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { +  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); +  std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); +  return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); +} -  for (Instruction *I : ToBeDeleted) -    I->eraseFromParent(); +// Create all simple and struct types exposed by the runtime and remember +// the llvm::PointerTypes of them for easy access later. +void OpenMPIRBuilder::initializeTypes(Module &M) { +  LLVMContext &Ctx = M.getContext(); +  StructType *T; +#define OMP_TYPE(VarName, InitValue) VarName = InitValue; +#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \ +  VarName##Ty = ArrayType::get(ElemTy, ArraySize);                             \ +  VarName##PtrTy = PointerType::getUnqual(VarName##Ty); +#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \ +  VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg);            \ +  VarName##Ptr = PointerType::getUnqual(VarName); +#define OMP_STRUCT_TYPE(VarName, StructName, ...)                              \ +  T = M.getTypeByName(StructName);                                             \ +  if (!T)                                                                      \ +    T = StructType::create(Ctx, {__VA_ARGS__}, StructName);                    \ +  VarName = T;                                                                 \ +  VarName##Ptr = PointerType::getUnqual(T); +#include "llvm/Frontend/OpenMP/OMPKinds.def" +} -  return AfterIP; +void OpenMPIRBuilder::OutlineInfo::collectBlocks( +    SmallPtrSetImpl<BasicBlock *> &BlockSet, +    SmallVectorImpl<BasicBlock *> &BlockVector) { +  SmallVector<BasicBlock *, 32> Worklist; +  BlockSet.insert(EntryBB); +  BlockSet.insert(ExitBB); + +  Worklist.push_back(EntryBB); +  while (!Worklist.empty()) { +    BasicBlock *BB = Worklist.pop_back_val(); +    BlockVector.push_back(BB); +    for (BasicBlock *SuccBB : successors(BB)) +      if (BlockSet.insert(SuccBB).second) +        Worklist.push_back(SuccBB); +  }  } | 
