diff options
Diffstat (limited to 'llvm/lib/Frontend')
-rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPConstants.cpp | 87 | ||||
-rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPContext.cpp | 527 | ||||
-rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 780 |
3 files changed, 1165 insertions, 229 deletions
diff --git a/llvm/lib/Frontend/OpenMP/OMPConstants.cpp b/llvm/lib/Frontend/OpenMP/OMPConstants.cpp deleted file mode 100644 index ec0733903e994..0000000000000 --- a/llvm/lib/Frontend/OpenMP/OMPConstants.cpp +++ /dev/null @@ -1,87 +0,0 @@ -//===- OMPConstants.cpp - Helpers related to OpenMP code generation ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// - -#include "llvm/Frontend/OpenMP/OMPConstants.h" - -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" - -using namespace llvm; -using namespace omp; -using namespace types; - -Directive llvm::omp::getOpenMPDirectiveKind(StringRef Str) { - return llvm::StringSwitch<Directive>(Str) -#define OMP_DIRECTIVE(Enum, Str) .Case(Str, Enum) -#include "llvm/Frontend/OpenMP/OMPKinds.def" - .Default(OMPD_unknown); -} - -StringRef llvm::omp::getOpenMPDirectiveName(Directive Kind) { - switch (Kind) { -#define OMP_DIRECTIVE(Enum, Str) \ - case Enum: \ - return Str; -#include "llvm/Frontend/OpenMP/OMPKinds.def" - } - llvm_unreachable("Invalid OpenMP directive kind"); -} - -/// Declarations for LLVM-IR types (simple, function and structure) are -/// generated below. Their names are defined and used in OpenMPKinds.def. Here -/// we provide the declarations, the initializeTypes function will provide the -/// values. -/// -///{ - -#define OMP_TYPE(VarName, InitValue) Type *llvm::omp::types::VarName = nullptr; -#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ - FunctionType *llvm::omp::types::VarName = nullptr; \ - PointerType *llvm::omp::types::VarName##Ptr = nullptr; -#define OMP_STRUCT_TYPE(VarName, StrName, ...) \ - StructType *llvm::omp::types::VarName = nullptr; \ - PointerType *llvm::omp::types::VarName##Ptr = nullptr; -#include "llvm/Frontend/OpenMP/OMPKinds.def" - -///} - -void llvm::omp::types::initializeTypes(Module &M) { - if (Void) - return; - - LLVMContext &Ctx = M.getContext(); - // Create all simple and struct types exposed by the runtime and remember - // the llvm::PointerTypes of them for easy access later. - StructType *T; -#define OMP_TYPE(VarName, InitValue) VarName = InitValue; -#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ - VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ - VarName##Ptr = PointerType::getUnqual(VarName); -#define OMP_STRUCT_TYPE(VarName, StructName, ...) \ - T = M.getTypeByName(StructName); \ - if (!T) \ - T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \ - VarName = T; \ - VarName##Ptr = PointerType::getUnqual(T); -#include "llvm/Frontend/OpenMP/OMPKinds.def" -} - -void llvm::omp::types::uninitializeTypes() { -#define OMP_TYPE(VarName, InitValue) VarName = nullptr; -#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ - VarName = nullptr; \ - VarName##Ptr = nullptr; -#define OMP_STRUCT_TYPE(VarName, StrName, ...) \ - VarName = nullptr; \ - VarName##Ptr = nullptr; -#include "llvm/Frontend/OpenMP/OMPKinds.def" -} diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp new file mode 100644 index 0000000000000..c44e858ab5ed5 --- /dev/null +++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp @@ -0,0 +1,527 @@ +//===- OMPContext.cpp ------ Collection of helpers for OpenMP contexts ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements helper functions and classes to deal with OpenMP +/// contexts as used by `[begin/end] declare variant` and `metadirective`. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Frontend/OpenMP/OMPContext.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "openmp-ir-builder" + +using namespace llvm; +using namespace omp; + +OMPContext::OMPContext(bool IsDeviceCompilation, Triple TargetTriple) { + // Add the appropriate device kind trait based on the triple and the + // IsDeviceCompilation flag. + ActiveTraits.set(unsigned(IsDeviceCompilation + ? TraitProperty::device_kind_nohost + : TraitProperty::device_kind_host)); + switch (TargetTriple.getArch()) { + case Triple::arm: + case Triple::armeb: + case Triple::aarch64: + case Triple::aarch64_be: + case Triple::aarch64_32: + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: + case Triple::mips64el: + case Triple::ppc: + case Triple::ppc64: + case Triple::ppc64le: + case Triple::x86: + case Triple::x86_64: + ActiveTraits.set(unsigned(TraitProperty::device_kind_cpu)); + break; + case Triple::amdgcn: + case Triple::nvptx: + case Triple::nvptx64: + ActiveTraits.set(unsigned(TraitProperty::device_kind_gpu)); + break; + default: + break; + } + + // Add the appropriate device architecture trait based on the triple. +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \ + if (TraitSelector::TraitSelectorEnum == TraitSelector::device_arch) \ + if (TargetTriple.getArch() == TargetTriple.getArchTypeForLLVMName(Str)) \ + ActiveTraits.set(unsigned(TraitProperty::Enum)); +#include "llvm/Frontend/OpenMP/OMPKinds.def" + + // TODO: What exactly do we want to see as device ISA trait? + // The discussion on the list did not seem to have come to an agreed + // upon solution. + + // LLVM is the "OpenMP vendor" but we could also interpret vendor as the + // target vendor. + ActiveTraits.set(unsigned(TraitProperty::implementation_vendor_llvm)); + + // The user condition true is accepted but not false. + ActiveTraits.set(unsigned(TraitProperty::user_condition_true)); + + // This is for sure some device. + ActiveTraits.set(unsigned(TraitProperty::device_kind_any)); + + LLVM_DEBUG({ + dbgs() << "[" << DEBUG_TYPE + << "] New OpenMP context with the following properties:\n"; + for (unsigned Bit : ActiveTraits.set_bits()) { + TraitProperty Property = TraitProperty(Bit); + dbgs() << "\t " << getOpenMPContextTraitPropertyFullName(Property) + << "\n"; + } + }); +} + +/// Return true if \p C0 is a subset of \p C1. Note that both arrays are +/// expected to be sorted. +template <typename T> static bool isSubset(ArrayRef<T> C0, ArrayRef<T> C1) { +#ifdef EXPENSIVE_CHECKS + assert(llvm::is_sorted(C0) && llvm::is_sorted(C1) && + "Expected sorted arrays!"); +#endif + if (C0.size() > C1.size()) + return false; + auto It0 = C0.begin(), End0 = C0.end(); + auto It1 = C1.begin(), End1 = C1.end(); + while (It0 != End0) { + if (It1 == End1) + return false; + if (*It0 == *It1) { + ++It0; + ++It1; + continue; + } + ++It0; + } + return true; +} + +/// Return true if \p C0 is a strict subset of \p C1. Note that both arrays are +/// expected to be sorted. +template <typename T> +static bool isStrictSubset(ArrayRef<T> C0, ArrayRef<T> C1) { + if (C0.size() >= C1.size()) + return false; + return isSubset<T>(C0, C1); +} + +static bool isStrictSubset(const VariantMatchInfo &VMI0, + const VariantMatchInfo &VMI1) { + // If all required traits are a strict subset and the ordered vectors storing + // the construct traits, we say it is a strict subset. Note that the latter + // relation is not required to be strict. + if (VMI0.RequiredTraits.count() >= VMI1.RequiredTraits.count()) + return false; + for (unsigned Bit : VMI0.RequiredTraits.set_bits()) + if (!VMI1.RequiredTraits.test(Bit)) + return false; + if (!isSubset<TraitProperty>(VMI0.ConstructTraits, VMI1.ConstructTraits)) + return false; + return true; +} + +static int isVariantApplicableInContextHelper( + const VariantMatchInfo &VMI, const OMPContext &Ctx, + SmallVectorImpl<unsigned> *ConstructMatches, bool DeviceSetOnly) { + + // The match kind determines if we need to match all traits, any of the + // traits, or none of the traits for it to be an applicable context. + enum MatchKind { MK_ALL, MK_ANY, MK_NONE }; + + MatchKind MK = MK_ALL; + // Determine the match kind the user wants, "all" is the default and provided + // to the user only for completeness. + if (VMI.RequiredTraits.test( + unsigned(TraitProperty::implementation_extension_match_any))) + MK = MK_ANY; + if (VMI.RequiredTraits.test( + unsigned(TraitProperty::implementation_extension_match_none))) + MK = MK_NONE; + + // Helper to deal with a single property that was (not) found in the OpenMP + // context based on the match kind selected by the user via + // `implementation={extensions(match_[all,any,none])}' + auto HandleTrait = [MK](TraitProperty Property, + bool WasFound) -> Optional<bool> /* Result */ { + // For kind "any" a single match is enough but we ignore non-matched + // properties. + if (MK == MK_ANY) { + if (WasFound) + return true; + return None; + } + + // In "all" or "none" mode we accept a matching or non-matching property + // respectively and move on. We are not done yet! + if ((WasFound && MK == MK_ALL) || (!WasFound && MK == MK_NONE)) + return None; + + // We missed a property, provide some debug output and indicate failure. + LLVM_DEBUG({ + if (MK == MK_ALL) + dbgs() << "[" << DEBUG_TYPE << "] Property " + << getOpenMPContextTraitPropertyName(Property) + << " was not in the OpenMP context but match kind is all.\n"; + if (MK == MK_NONE) + dbgs() << "[" << DEBUG_TYPE << "] Property " + << getOpenMPContextTraitPropertyName(Property) + << " was in the OpenMP context but match kind is none.\n"; + }); + return false; + }; + + for (unsigned Bit : VMI.RequiredTraits.set_bits()) { + TraitProperty Property = TraitProperty(Bit); + if (DeviceSetOnly && + getOpenMPContextTraitSetForProperty(Property) != TraitSet::device) + continue; + + // So far all extensions are handled elsewhere, we skip them here as they + // are not part of the OpenMP context. + if (getOpenMPContextTraitSelectorForProperty(Property) == + TraitSelector::implementation_extension) + continue; + + bool IsActiveTrait = Ctx.ActiveTraits.test(unsigned(Property)); + Optional<bool> Result = HandleTrait(Property, IsActiveTrait); + if (Result.hasValue()) + return Result.getValue(); + } + + if (!DeviceSetOnly) { + // We could use isSubset here but we also want to record the match + // locations. + unsigned ConstructIdx = 0, NoConstructTraits = Ctx.ConstructTraits.size(); + for (TraitProperty Property : VMI.ConstructTraits) { + assert(getOpenMPContextTraitSetForProperty(Property) == + TraitSet::construct && + "Variant context is ill-formed!"); + + // Verify the nesting. + bool FoundInOrder = false; + while (!FoundInOrder && ConstructIdx != NoConstructTraits) + FoundInOrder = (Ctx.ConstructTraits[ConstructIdx++] == Property); + if (ConstructMatches) + ConstructMatches->push_back(ConstructIdx - 1); + + Optional<bool> Result = HandleTrait(Property, FoundInOrder); + if (Result.hasValue()) + return Result.getValue(); + + if (!FoundInOrder) { + LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] Construct property " + << getOpenMPContextTraitPropertyName(Property) + << " was not nested properly.\n"); + return false; + } + + // TODO: Verify SIMD + } + + assert(isSubset<TraitProperty>(VMI.ConstructTraits, Ctx.ConstructTraits) && + "Broken invariant!"); + } + + if (MK == MK_ANY) { + LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE + << "] None of the properties was in the OpenMP context " + "but match kind is any.\n"); + return false; + } + + return true; +} + +bool llvm::omp::isVariantApplicableInContext(const VariantMatchInfo &VMI, + const OMPContext &Ctx, + bool DeviceSetOnly) { + return isVariantApplicableInContextHelper( + VMI, Ctx, /* ConstructMatches */ nullptr, DeviceSetOnly); +} + +static APInt getVariantMatchScore(const VariantMatchInfo &VMI, + const OMPContext &Ctx, + SmallVectorImpl<unsigned> &ConstructMatches) { + APInt Score(64, 1); + + unsigned NoConstructTraits = VMI.ConstructTraits.size(); + for (unsigned Bit : VMI.RequiredTraits.set_bits()) { + TraitProperty Property = TraitProperty(Bit); + // If there is a user score attached, use it. + if (VMI.ScoreMap.count(Property)) { + const APInt &UserScore = VMI.ScoreMap.lookup(Property); + assert(UserScore.uge(0) && "Expect non-negative user scores!"); + Score += UserScore.getZExtValue(); + continue; + } + + switch (getOpenMPContextTraitSetForProperty(Property)) { + case TraitSet::construct: + // We handle the construct traits later via the VMI.ConstructTraits + // container. + continue; + case TraitSet::implementation: + // No effect on the score (implementation defined). + continue; + case TraitSet::user: + // No effect on the score. + continue; + case TraitSet::device: + // Handled separately below. + break; + case TraitSet::invalid: + llvm_unreachable("Unknown trait set is not to be used!"); + } + + // device={kind(any)} is "as if" no kind selector was specified. + if (Property == TraitProperty::device_kind_any) + continue; + + switch (getOpenMPContextTraitSelectorForProperty(Property)) { + case TraitSelector::device_kind: + Score += (1ULL << (NoConstructTraits + 0)); + continue; + case TraitSelector::device_arch: + Score += (1ULL << (NoConstructTraits + 1)); + continue; + case TraitSelector::device_isa: + Score += (1ULL << (NoConstructTraits + 2)); + continue; + default: + continue; + } + } + + unsigned ConstructIdx = 0; + assert(NoConstructTraits == ConstructMatches.size() && + "Mismatch in the construct traits!"); + for (TraitProperty Property : VMI.ConstructTraits) { + assert(getOpenMPContextTraitSetForProperty(Property) == + TraitSet::construct && + "Ill-formed variant match info!"); + (void)Property; + // ConstructMatches is the position p - 1 and we need 2^(p-1). + Score += (1ULL << ConstructMatches[ConstructIdx++]); + } + + LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] Variant has a score of " << Score + << "\n"); + return Score; +} + +int llvm::omp::getBestVariantMatchForContext( + const SmallVectorImpl<VariantMatchInfo> &VMIs, const OMPContext &Ctx) { + + APInt BestScore(64, 0); + int BestVMIIdx = -1; + const VariantMatchInfo *BestVMI = nullptr; + + for (unsigned u = 0, e = VMIs.size(); u < e; ++u) { + const VariantMatchInfo &VMI = VMIs[u]; + + SmallVector<unsigned, 8> ConstructMatches; + // If the variant is not applicable its not the best. + if (!isVariantApplicableInContextHelper(VMI, Ctx, &ConstructMatches, + /* DeviceSetOnly */ false)) + continue; + // Check if its clearly not the best. + APInt Score = getVariantMatchScore(VMI, Ctx, ConstructMatches); + if (Score.ult(BestScore)) + continue; + // Equal score need subset checks. + if (Score.eq(BestScore)) { + // Strict subset are never best. + if (isStrictSubset(VMI, *BestVMI)) + continue; + // Same score and the current best is no strict subset so we keep it. + if (!isStrictSubset(*BestVMI, VMI)) + continue; + } + // New best found. + BestVMI = &VMI; + BestVMIIdx = u; + BestScore = Score; + } + + return BestVMIIdx; +} + +TraitSet llvm::omp::getOpenMPContextTraitSetKind(StringRef S) { + return StringSwitch<TraitSet>(S) +#define OMP_TRAIT_SET(Enum, Str) .Case(Str, TraitSet::Enum) +#include "llvm/Frontend/OpenMP/OMPKinds.def" + .Default(TraitSet::invalid); +} + +TraitSet +llvm::omp::getOpenMPContextTraitSetForSelector(TraitSelector Selector) { + switch (Selector) { +#define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp) \ + case TraitSelector::Enum: \ + return TraitSet::TraitSetEnum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + llvm_unreachable("Unknown trait selector!"); +} +TraitSet +llvm::omp::getOpenMPContextTraitSetForProperty(TraitProperty Property) { + switch (Property) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \ + case TraitProperty::Enum: \ + return TraitSet::TraitSetEnum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + llvm_unreachable("Unknown trait set!"); +} +StringRef llvm::omp::getOpenMPContextTraitSetName(TraitSet Kind) { + switch (Kind) { +#define OMP_TRAIT_SET(Enum, Str) \ + case TraitSet::Enum: \ + return Str; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + llvm_unreachable("Unknown trait set!"); +} + +TraitSelector llvm::omp::getOpenMPContextTraitSelectorKind(StringRef S) { + return StringSwitch<TraitSelector>(S) +#define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp) \ + .Case(Str, TraitSelector::Enum) +#include "llvm/Frontend/OpenMP/OMPKinds.def" + .Default(TraitSelector::invalid); +} +TraitSelector +llvm::omp::getOpenMPContextTraitSelectorForProperty(TraitProperty Property) { + switch (Property) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \ + case TraitProperty::Enum: \ + return TraitSelector::TraitSelectorEnum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + llvm_unreachable("Unknown trait set!"); +} +StringRef llvm::omp::getOpenMPContextTraitSelectorName(TraitSelector Kind) { + switch (Kind) { +#define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp) \ + case TraitSelector::Enum: \ + return Str; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + llvm_unreachable("Unknown trait selector!"); +} + +TraitProperty llvm::omp::getOpenMPContextTraitPropertyKind(TraitSet Set, + StringRef S) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \ + if (Set == TraitSet::TraitSetEnum && Str == S) \ + return TraitProperty::Enum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + return TraitProperty::invalid; +} +TraitProperty +llvm::omp::getOpenMPContextTraitPropertyForSelector(TraitSelector Selector) { + return StringSwitch<TraitProperty>( + getOpenMPContextTraitSelectorName(Selector)) +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \ + .Case(Str, Selector == TraitSelector::TraitSelectorEnum \ + ? TraitProperty::Enum \ + : TraitProperty::invalid) +#include "llvm/Frontend/OpenMP/OMPKinds.def" + .Default(TraitProperty::invalid); +} +StringRef llvm::omp::getOpenMPContextTraitPropertyName(TraitProperty Kind) { + switch (Kind) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \ + case TraitProperty::Enum: \ + return Str; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + llvm_unreachable("Unknown trait property!"); +} +StringRef llvm::omp::getOpenMPContextTraitPropertyFullName(TraitProperty Kind) { + switch (Kind) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \ + case TraitProperty::Enum: \ + return "(" #TraitSetEnum "," #TraitSelectorEnum "," Str ")"; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + llvm_unreachable("Unknown trait property!"); +} + +bool llvm::omp::isValidTraitSelectorForTraitSet(TraitSelector Selector, + TraitSet Set, + bool &AllowsTraitScore, + bool &RequiresProperty) { + AllowsTraitScore = Set != TraitSet::construct && Set != TraitSet::device; + switch (Selector) { +#define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp) \ + case TraitSelector::Enum: \ + RequiresProperty = ReqProp; \ + return Set == TraitSet::TraitSetEnum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + llvm_unreachable("Unknown trait selector!"); +} + +bool llvm::omp::isValidTraitPropertyForTraitSetAndSelector( + TraitProperty Property, TraitSelector Selector, TraitSet Set) { + switch (Property) { +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \ + case TraitProperty::Enum: \ + return Set == TraitSet::TraitSetEnum && \ + Selector == TraitSelector::TraitSelectorEnum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + llvm_unreachable("Unknown trait property!"); +} + +std::string llvm::omp::listOpenMPContextTraitSets() { + std::string S; +#define OMP_TRAIT_SET(Enum, Str) \ + if (StringRef(Str) != "invalid") \ + S.append("'").append(Str).append("'").append(" "); +#include "llvm/Frontend/OpenMP/OMPKinds.def" + S.pop_back(); + return S; +} + +std::string llvm::omp::listOpenMPContextTraitSelectors(TraitSet Set) { + std::string S; +#define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp) \ + if (TraitSet::TraitSetEnum == Set && StringRef(Str) != "Invalid") \ + S.append("'").append(Str).append("'").append(" "); +#include "llvm/Frontend/OpenMP/OMPKinds.def" + S.pop_back(); + return S; +} + +std::string +llvm::omp::listOpenMPContextTraitProperties(TraitSet Set, + TraitSelector Selector) { + std::string S; +#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str) \ + if (TraitSet::TraitSetEnum == Set && \ + TraitSelector::TraitSelectorEnum == Selector && \ + StringRef(Str) != "invalid") \ + S.append("'").append(Str).append("'").append(" "); +#include "llvm/Frontend/OpenMP/OMPKinds.def" + if (S.empty()) + return "<none>"; + S.pop_back(); + return S; +} diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 739c2998baa8f..9468a3aa3c8dd 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -18,8 +18,8 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/MDBuilder.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -31,7 +31,6 @@ using namespace llvm; using namespace omp; -using namespace types; static cl::opt<bool> OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, @@ -59,13 +58,17 @@ void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { } } -Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) { +FunctionCallee +OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) { + FunctionType *FnTy = nullptr; Function *Fn = nullptr; // Try to find the declation in the module first. switch (FnID) { #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ case Enum: \ + FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \ + IsVarArg); \ Fn = M.getFunction(Str); \ break; #include "llvm/Frontend/OpenMP/OMPKinds.def" @@ -74,25 +77,113 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) { if (!Fn) { // Create a new declaration if we need one. switch (FnID) { -#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ +#define OMP_RTL(Enum, Str, ...) \ case Enum: \ - Fn = Function::Create(FunctionType::get(ReturnType, \ - ArrayRef<Type *>{__VA_ARGS__}, \ - IsVarArg), \ - GlobalValue::ExternalLinkage, Str, M); \ + Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \ break; #include "llvm/Frontend/OpenMP/OMPKinds.def" } + // Add information if the runtime function takes a callback function + if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) { + if (!Fn->hasMetadata(LLVMContext::MD_callback)) { + LLVMContext &Ctx = Fn->getContext(); + MDBuilder MDB(Ctx); + // Annotate the callback behavior of the runtime function: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the runtime function are passed to the + // callback callee. + Fn->addMetadata( + LLVMContext::MD_callback, + *MDNode::get(Ctx, {MDB.createCallbackEncoding( + 2, {-1, -1}, /* VarArgsArePassed */ true)})); + } + } + + LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName() + << " with type " << *Fn->getFunctionType() << "\n"); addAttributes(FnID, *Fn); + + } else { + LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName() + << " with type " << *Fn->getFunctionType() << "\n"); } assert(Fn && "Failed to create OpenMP runtime function"); + + // Cast the function to the expected type if necessary + Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo()); + return {FnTy, C}; +} + +Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { + FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID); + auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee()); + assert(Fn && "Failed to create OpenMP runtime function pointer"); return Fn; } void OpenMPIRBuilder::initialize() { initializeTypes(M); } +void OpenMPIRBuilder::finalize() { + SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; + SmallVector<BasicBlock *, 32> Blocks; + for (OutlineInfo &OI : OutlineInfos) { + ParallelRegionBlockSet.clear(); + Blocks.clear(); + OI.collectBlocks(ParallelRegionBlockSet, Blocks); + + Function *OuterFn = OI.EntryBB->getParent(); + CodeExtractorAnalysisCache CEAC(*OuterFn); + CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, + /* AggregateArgs */ false, + /* BlockFrequencyInfo */ nullptr, + /* BranchProbabilityInfo */ nullptr, + /* AssumptionCache */ nullptr, + /* AllowVarArgs */ true, + /* AllowAlloca */ true, + /* Suffix */ ".omp_par"); + + LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); + LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() + << " Exit: " << OI.ExitBB->getName() << "\n"); + assert(Extractor.isEligible() && + "Expected OpenMP outlining to be possible!"); + + Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); + + LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); + LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); + assert(OutlinedFn->getReturnType()->isVoidTy() && + "OpenMP outlined functions should not return a value!"); + + // For compability with the clang CG we move the outlined function after the + // one with the parallel region. + OutlinedFn->removeFromParent(); + M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); + + // Remove the artificial entry introduced by the extractor right away, we + // made our own entry block after all. + { + BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); + assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); + assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); + OI.EntryBB->moveBefore(&ArtificialEntry); + ArtificialEntry.eraseFromParent(); + } + assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); + assert(OutlinedFn && OutlinedFn->getNumUses() == 1); + + // Run a user callback, e.g. to add attributes. + if (OI.PostOutlineCB) + OI.PostOutlineCB(*OutlinedFn); + } + + // Allow finalize to be called multiple times. + OutlineInfos.clear(); +} + Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, IdentFlag LocFlags) { // Enable "C-mode". @@ -165,7 +256,7 @@ OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { return Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num), Ident, + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident, "omp_global_thread_num"); } @@ -212,10 +303,11 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, bool UseCancelBarrier = !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel); - Value *Result = Builder.CreateCall( - getOrCreateRuntimeFunction(UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier - : OMPRTL___kmpc_barrier), - Args); + Value *Result = + Builder.CreateCall(getOrCreateRuntimeFunctionPtr( + UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier + : OMPRTL___kmpc_barrier), + Args); if (UseCancelBarrier && CheckCancelFlag) emitCancelationCheckImpl(Result, OMPD_parallel); @@ -253,7 +345,7 @@ OpenMPIRBuilder::CreateCancel(const LocationDescription &Loc, Value *Ident = getOrCreateIdent(SrcLocStr); Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_cancel), Args); + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); // The actual cancel logic is shared with others, e.g., cancel_barriers. emitCancelationCheckImpl(Result, CanceledDirective); @@ -318,7 +410,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( Ident, ThreadID, Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args); + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args); } if (ProcBind != OMP_PROC_BIND_default) { @@ -326,8 +418,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( Value *Args[] = { Ident, ThreadID, ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; - Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind), - Args); + Builder.CreateCall( + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args); } BasicBlock *InsertBB = Builder.GetInsertBlock(); @@ -415,32 +507,135 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( // PRegionExitBB <- A common exit to simplify block collection. // - LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n"); // Let the caller create the body. assert(BodyGenCB && "Expected body generation callback!"); InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB); - LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); + + FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); + if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { + if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { + llvm::LLVMContext &Ctx = F->getContext(); + MDBuilder MDB(Ctx); + // Annotate the callback behavior of the __kmpc_fork_call: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the __kmpc_fork_call are passed to the + // callback callee. + F->addMetadata( + llvm::LLVMContext::MD_callback, + *llvm::MDNode::get( + Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, + /* VarArgsArePassed */ true)})); + } + } + + OutlineInfo OI; + OI.PostOutlineCB = [=](Function &OutlinedFn) { + // Add some known attributes. + OutlinedFn.addParamAttr(0, Attribute::NoAlias); + OutlinedFn.addParamAttr(1, Attribute::NoAlias); + OutlinedFn.addFnAttr(Attribute::NoUnwind); + OutlinedFn.addFnAttr(Attribute::NoRecurse); + + assert(OutlinedFn.arg_size() >= 2 && + "Expected at least tid and bounded tid as arguments"); + unsigned NumCapturedVars = + OutlinedFn.arg_size() - /* tid & bounded tid */ 2; + + CallInst *CI = cast<CallInst>(OutlinedFn.user_back()); + CI->getParent()->setName("omp_parallel"); + Builder.SetInsertPoint(CI); + + // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); + Value *ForkCallArgs[] = { + Ident, Builder.getInt32(NumCapturedVars), + Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; + + SmallVector<Value *, 16> RealArgs; + RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); + RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); + + Builder.CreateCall(RTLFn, RealArgs); + + LLVM_DEBUG(dbgs() << "With fork_call placed: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + + InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); + + // Initialize the local TID stack location with the argument value. + Builder.SetInsertPoint(PrivTID); + Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); + Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); + + // If no "if" clause was present we do not need the call created during + // outlining, otherwise we reuse it in the serialized parallel region. + if (!ElseTI) { + CI->eraseFromParent(); + } else { + + // If an "if" clause was present we are now generating the serialized + // version into the "else" branch. + Builder.SetInsertPoint(ElseTI); + + // Build calls __kmpc_serialized_parallel(&Ident, GTid); + Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel), + SerializedParallelCallArgs); + + // OutlinedFn(>id, &zero, CapturedStruct); + CI->removeFromParent(); + Builder.Insert(CI); + + // __kmpc_end_serialized_parallel(&Ident, GTid); + Value *EndArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel), + EndArgs); + + LLVM_DEBUG(dbgs() << "With serialized parallel region: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + } + + for (Instruction *I : ToBeDeleted) + I->eraseFromParent(); + }; + + // Adjust the finalization stack, verify the adjustment, and call the + // finalize function a last time to finalize values between the pre-fini + // block and the exit block if we left the parallel "the normal way". + auto FiniInfo = FinalizationStack.pop_back_val(); + (void)FiniInfo; + assert(FiniInfo.DK == OMPD_parallel && + "Unexpected finalization stack state!"); + + Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); + + InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); + FiniCB(PreFiniIP); + + OI.EntryBB = PRegEntryBB; + OI.ExitBB = PRegExitBB; SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; - SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist; - ParallelRegionBlockSet.insert(PRegEntryBB); - ParallelRegionBlockSet.insert(PRegExitBB); + SmallVector<BasicBlock *, 32> Blocks; + OI.collectBlocks(ParallelRegionBlockSet, Blocks); - // Collect all blocks in-between PRegEntryBB and PRegExitBB. - Worklist.push_back(PRegEntryBB); - while (!Worklist.empty()) { - BasicBlock *BB = Worklist.pop_back_val(); - ParallelRegionBlocks.push_back(BB); - for (BasicBlock *SuccBB : successors(BB)) - if (ParallelRegionBlockSet.insert(SuccBB).second) - Worklist.push_back(SuccBB); - } + // Ensure a single exit node for the outlined region by creating one. + // We might have multiple incoming edges to the exit now due to finalizations, + // e.g., cancel calls that cause the control flow to leave the region. + BasicBlock *PRegOutlinedExitBB = PRegExitBB; + PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); + PRegOutlinedExitBB->setName("omp.par.outlined.exit"); + Blocks.push_back(PRegOutlinedExitBB); CodeExtractorAnalysisCache CEAC(*OuterFn); - CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr, + CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, @@ -455,10 +650,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); - LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); FunctionCallee TIDRTLFn = - getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num); + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); auto PrivHelper = [&](Value &V) { if (&V == TIDAddr || &V == ZeroAddr) @@ -491,142 +686,443 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); PrivHelper(*Input); } - for (Value *Output : Outputs) { - LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); - PrivHelper(*Output); - } + assert(Outputs.empty() && + "OpenMP outlining should not produce live-out values!"); - LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); LLVM_DEBUG({ - for (auto *BB : ParallelRegionBlocks) + for (auto *BB : Blocks) dbgs() << " PBR: " << BB->getName() << "\n"; }); - // Add some known attributes to the outlined function. - Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); - OutlinedFn->addParamAttr(0, Attribute::NoAlias); - OutlinedFn->addParamAttr(1, Attribute::NoAlias); - OutlinedFn->addFnAttr(Attribute::NoUnwind); - OutlinedFn->addFnAttr(Attribute::NoRecurse); - - LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n"); - LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); - - // For compability with the clang CG we move the outlined function after the - // one with the parallel region. - OutlinedFn->removeFromParent(); - M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); - - // Remove the artificial entry introduced by the extractor right away, we - // made our own entry block after all. - { - BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); - assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB); - assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry); - PRegEntryBB->moveBefore(&ArtificialEntry); - ArtificialEntry.eraseFromParent(); - } - LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n"); - assert(&OutlinedFn->getEntryBlock() == PRegEntryBB); + // Register the outlined info. + addOutlineInfo(std::move(OI)); + + InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); + UI->eraseFromParent(); - assert(OutlinedFn && OutlinedFn->getNumUses() == 1); - assert(OutlinedFn->arg_size() >= 2 && - "Expected at least tid and bounded tid as arguments"); - unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2; + return AfterIP; +} - CallInst *CI = cast<CallInst>(OutlinedFn->user_back()); - CI->getParent()->setName("omp_parallel"); - Builder.SetInsertPoint(CI); +void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { + // Build call void __kmpc_flush(ident_t *loc) + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Args[] = {getOrCreateIdent(SrcLocStr)}; - // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); - Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars), - Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)}; + Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); +} - SmallVector<Value *, 16> RealArgs; - RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); - RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); +void OpenMPIRBuilder::CreateFlush(const LocationDescription &Loc) { + if (!updateToLocation(Loc)) + return; + emitFlush(Loc); +} - FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); - if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { - if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { - llvm::LLVMContext &Ctx = F->getContext(); - MDBuilder MDB(Ctx); - // Annotate the callback behavior of the __kmpc_fork_call: - // - The callback callee is argument number 2 (microtask). - // - The first two arguments of the callback callee are unknown (-1). - // - All variadic arguments to the __kmpc_fork_call are passed to the - // callback callee. - F->addMetadata( - llvm::LLVMContext::MD_callback, - *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( - 2, {-1, -1}, - /* VarArgsArePassed */ true)})); +void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { + // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 + // global_tid); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; + + // Ignore return result until untied tasks are supported. + Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), + Args); +} + +void OpenMPIRBuilder::CreateTaskwait(const LocationDescription &Loc) { + if (!updateToLocation(Loc)) + return; + emitTaskwaitImpl(Loc); +} + +void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { + // Build call __kmpc_omp_taskyield(loc, thread_id, 0); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Constant *I32Null = ConstantInt::getNullValue(Int32); + Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; + + Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), + Args); +} + +void OpenMPIRBuilder::CreateTaskyield(const LocationDescription &Loc) { + if (!updateToLocation(Loc)) + return; + emitTaskyieldImpl(Loc); +} + +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::CreateMaster(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB) { + + if (!updateToLocation(Loc)) + return Loc.IP; + + Directive OMPD = Directive::OMPD_master; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *Args[] = {Ident, ThreadId}; + + Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master); + Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); + + Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master); + Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); + + return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, + /*Conditional*/ true, /*hasFinalize*/ true); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::CreateCritical( + const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { + + if (!updateToLocation(Loc)) + return Loc.IP; + + Directive OMPD = Directive::OMPD_critical; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *LockVar = getOMPCriticalRegionLock(CriticalName); + Value *Args[] = {Ident, ThreadId, LockVar}; + + SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args)); + Function *RTFn = nullptr; + if (HintInst) { + // Add Hint to entry Args and create call + EnterArgs.push_back(HintInst); + RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint); + } else { + RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical); + } + Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs); + + Function *ExitRTLFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical); + Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); + + return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, + /*Conditional*/ false, /*hasFinalize*/ true); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( + Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, + BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, + bool HasFinalize) { + + if (HasFinalize) + FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false}); + + // Create inlined region's entry and body blocks, in preparation + // for conditional creation + BasicBlock *EntryBB = Builder.GetInsertBlock(); + Instruction *SplitPos = EntryBB->getTerminator(); + if (!isa_and_nonnull<BranchInst>(SplitPos)) + SplitPos = new UnreachableInst(Builder.getContext(), EntryBB); + BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end"); + BasicBlock *FiniBB = + EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); + + Builder.SetInsertPoint(EntryBB->getTerminator()); + emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); + + // generate body + BodyGenCB(/* AllocaIP */ InsertPointTy(), + /* CodeGenIP */ Builder.saveIP(), *FiniBB); + + // If we didn't emit a branch to FiniBB during body generation, it means + // FiniBB is unreachable (e.g. while(1);). stop generating all the + // unreachable blocks, and remove anything we are not going to use. + auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); + if (SkipEmittingRegion) { + FiniBB->eraseFromParent(); + ExitCall->eraseFromParent(); + // Discard finalization if we have it. + if (HasFinalize) { + assert(!FinalizationStack.empty() && + "Unexpected finalization stack state!"); + FinalizationStack.pop_back(); } + } else { + // emit exit call and do any needed finalization. + auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); + assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && + FiniBB->getTerminator()->getSuccessor(0) == ExitBB && + "Unexpected control flow graph state!!"); + emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); + assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && + "Unexpected Control Flow State!"); + MergeBlockIntoPredecessor(FiniBB); } - Builder.CreateCall(RTLFn, RealArgs); + // If we are skipping the region of a non conditional, remove the exit + // block, and clear the builder's insertion point. + assert(SplitPos->getParent() == ExitBB && + "Unexpected Insertion point location!"); + if (!Conditional && SkipEmittingRegion) { + ExitBB->eraseFromParent(); + Builder.ClearInsertionPoint(); + } else { + auto merged = MergeBlockIntoPredecessor(ExitBB); + BasicBlock *ExitPredBB = SplitPos->getParent(); + auto InsertBB = merged ? ExitPredBB : ExitBB; + if (!isa_and_nonnull<BranchInst>(SplitPos)) + SplitPos->eraseFromParent(); + Builder.SetInsertPoint(InsertBB); + } - LLVM_DEBUG(dbgs() << "With fork_call placed: " - << *Builder.GetInsertBlock()->getParent() << "\n"); + return Builder.saveIP(); +} - InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); - InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( + Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) { + + // if nothing to do, Return current insertion point. + if (!Conditional) + return Builder.saveIP(); + + BasicBlock *EntryBB = Builder.GetInsertBlock(); + Value *CallBool = Builder.CreateIsNotNull(EntryCall); + auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); + auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); + + // Emit thenBB and set the Builder's insertion point there for + // body generation next. Place the block after the current block. + Function *CurFn = EntryBB->getParent(); + CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB); + + // Move Entry branch to end of ThenBB, and replace with conditional + // branch (If-stmt) + Instruction *EntryBBTI = EntryBB->getTerminator(); + Builder.CreateCondBr(CallBool, ThenBB, ExitBB); + EntryBBTI->removeFromParent(); + Builder.SetInsertPoint(UI); + Builder.Insert(EntryBBTI); UI->eraseFromParent(); + Builder.SetInsertPoint(ThenBB->getTerminator()); + + // return an insertion point to ExitBB. + return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt()); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( + omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, + bool HasFinalize) { + + Builder.restoreIP(FinIP); + + // If there is finalization to do, emit it before the exit call + if (HasFinalize) { + assert(!FinalizationStack.empty() && + "Unexpected finalization stack state!"); + + FinalizationInfo Fi = FinalizationStack.pop_back_val(); + assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); + + Fi.FiniCB(FinIP); + + BasicBlock *FiniBB = FinIP.getBlock(); + Instruction *FiniBBTI = FiniBB->getTerminator(); - // Initialize the local TID stack location with the argument value. - Builder.SetInsertPoint(PrivTID); - Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin(); - Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); + // set Builder IP for call creation + Builder.SetInsertPoint(FiniBBTI); + } + + // place the Exitcall as last instruction before Finalization block terminator + ExitCall->removeFromParent(); + Builder.Insert(ExitCall); + + return IRBuilder<>::InsertPoint(ExitCall->getParent(), + ExitCall->getIterator()); +} - // If no "if" clause was present we do not need the call created during - // outlining, otherwise we reuse it in the serialized parallel region. - if (!ElseTI) { - CI->eraseFromParent(); +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::CreateCopyinClauseBlocks( + InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, + llvm::IntegerType *IntPtrTy, bool BranchtoEnd) { + if (!IP.isSet()) + return IP; + + IRBuilder<>::InsertPointGuard IPG(Builder); + + // creates the following CFG structure + // OMP_Entry : (MasterAddr != PrivateAddr)? + // F T + // | \ + // | copin.not.master + // | / + // v / + // copyin.not.master.end + // | + // v + // OMP.Entry.Next + + BasicBlock *OMP_Entry = IP.getBlock(); + Function *CurFn = OMP_Entry->getParent(); + BasicBlock *CopyBegin = + BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn); + BasicBlock *CopyEnd = nullptr; + + // If entry block is terminated, split to preserve the branch to following + // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is. + if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) { + CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(), + "copyin.not.master.end"); + OMP_Entry->getTerminator()->eraseFromParent(); } else { + CopyEnd = + BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn); + } - // If an "if" clause was present we are now generating the serialized - // version into the "else" branch. - Builder.SetInsertPoint(ElseTI); + Builder.SetInsertPoint(OMP_Entry); + Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy); + Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy); + Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr); + Builder.CreateCondBr(cmp, CopyBegin, CopyEnd); - // Build calls __kmpc_serialized_parallel(&Ident, GTid); - Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; - Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel), - SerializedParallelCallArgs); + Builder.SetInsertPoint(CopyBegin); + if (BranchtoEnd) + Builder.SetInsertPoint(Builder.CreateBr(CopyEnd)); - // OutlinedFn(>id, &zero, CapturedStruct); - CI->removeFromParent(); - Builder.Insert(CI); + return Builder.saveIP(); +} - // __kmpc_end_serialized_parallel(&Ident, GTid); - Value *EndArgs[] = {Ident, ThreadID}; - Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel), - EndArgs); +CallInst *OpenMPIRBuilder::CreateOMPAlloc(const LocationDescription &Loc, + Value *Size, Value *Allocator, + std::string Name) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); - LLVM_DEBUG(dbgs() << "With serialized parallel region: " - << *Builder.GetInsertBlock()->getParent() << "\n"); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *Args[] = {ThreadId, Size, Allocator}; + + Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc); + + return Builder.CreateCall(Fn, Args, Name); +} + +CallInst *OpenMPIRBuilder::CreateOMPFree(const LocationDescription &Loc, + Value *Addr, Value *Allocator, + std::string Name) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *Args[] = {ThreadId, Addr, Allocator}; + Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); + return Builder.CreateCall(Fn, Args, Name); +} + +CallInst *OpenMPIRBuilder::CreateCachedThreadPrivate( + const LocationDescription &Loc, llvm::Value *Pointer, + llvm::ConstantInt *Size, const llvm::Twine &Name) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Constant *ThreadPrivateCache = + getOrCreateOMPInternalVariable(Int8PtrPtr, Name); + llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; + + Function *Fn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); + + return Builder.CreateCall(Fn, Args); +} + +std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, + StringRef FirstSeparator, + StringRef Separator) { + SmallString<128> Buffer; + llvm::raw_svector_ostream OS(Buffer); + StringRef Sep = FirstSeparator; + for (StringRef Part : Parts) { + OS << Sep << Part; + Sep = Separator; } + return OS.str().str(); +} - // Adjust the finalization stack, verify the adjustment, and call the - // finalize function a last time to finalize values between the pre-fini block - // and the exit block if we left the parallel "the normal way". - auto FiniInfo = FinalizationStack.pop_back_val(); - (void)FiniInfo; - assert(FiniInfo.DK == OMPD_parallel && - "Unexpected finalization stack state!"); +Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( + llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { + // TODO: Replace the twine arg with stringref to get rid of the conversion + // logic. However This is taken from current implementation in clang as is. + // Since this method is used in many places exclusively for OMP internal use + // we will keep it as is for temporarily until we move all users to the + // builder and then, if possible, fix it everywhere in one go. + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Name; + StringRef RuntimeName = Out.str(); + auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; + if (Elem.second) { + assert(Elem.second->getType()->getPointerElementType() == Ty && + "OMP internal variable has different type than requested"); + } else { + // TODO: investigate the appropriate linkage type used for the global + // variable for possibly changing that to internal or private, or maybe + // create different versions of the function for different OMP internal + // variables. + Elem.second = new llvm::GlobalVariable( + M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, + llvm::Constant::getNullValue(Ty), Elem.first(), + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + AddressSpace); + } - Instruction *PreFiniTI = PRegPreFiniBB->getTerminator(); - assert(PreFiniTI->getNumSuccessors() == 1 && - PreFiniTI->getSuccessor(0)->size() == 1 && - isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) && - "Unexpected CFG structure!"); + return Elem.second; +} - InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator()); - FiniCB(PreFiniIP); +Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { + std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); + std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); + return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); +} - for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); +// Create all simple and struct types exposed by the runtime and remember +// the llvm::PointerTypes of them for easy access later. +void OpenMPIRBuilder::initializeTypes(Module &M) { + LLVMContext &Ctx = M.getContext(); + StructType *T; +#define OMP_TYPE(VarName, InitValue) VarName = InitValue; +#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ + VarName##Ty = ArrayType::get(ElemTy, ArraySize); \ + VarName##PtrTy = PointerType::getUnqual(VarName##Ty); +#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ + VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ + VarName##Ptr = PointerType::getUnqual(VarName); +#define OMP_STRUCT_TYPE(VarName, StructName, ...) \ + T = M.getTypeByName(StructName); \ + if (!T) \ + T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \ + VarName = T; \ + VarName##Ptr = PointerType::getUnqual(T); +#include "llvm/Frontend/OpenMP/OMPKinds.def" +} - return AfterIP; +void OpenMPIRBuilder::OutlineInfo::collectBlocks( + SmallPtrSetImpl<BasicBlock *> &BlockSet, + SmallVectorImpl<BasicBlock *> &BlockVector) { + SmallVector<BasicBlock *, 32> Worklist; + BlockSet.insert(EntryBB); + BlockSet.insert(ExitBB); + + Worklist.push_back(EntryBB); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + BlockVector.push_back(BB); + for (BasicBlock *SuccBB : successors(BB)) + if (BlockSet.insert(SuccBB).second) + Worklist.push_back(SuccBB); + } } |