diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:38:04 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:38:11 +0000 |
commit | e3b557809604d036af6e00c60f012c2025b59a5e (patch) | |
tree | 8a11ba2269a3b669601e2fd41145b174008f4da8 /llvm/lib/Frontend | |
parent | 08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff) | |
download | src-e3b557809604d036af6e00c60f012c2025b59a5e.tar.gz src-e3b557809604d036af6e00c60f012c2025b59a5e.zip |
Diffstat (limited to 'llvm/lib/Frontend')
-rw-r--r-- | llvm/lib/Frontend/HLSL/HLSLResource.cpp | 56 | ||||
-rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPContext.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 1024 |
3 files changed, 935 insertions, 161 deletions
diff --git a/llvm/lib/Frontend/HLSL/HLSLResource.cpp b/llvm/lib/Frontend/HLSL/HLSLResource.cpp new file mode 100644 index 000000000000..59f730d8a495 --- /dev/null +++ b/llvm/lib/Frontend/HLSL/HLSLResource.cpp @@ -0,0 +1,56 @@ +//===- HLSLResource.cpp - HLSL Resource helper objects --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file contains helper objects for working with HLSL Resources. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Frontend/HLSL/HLSLResource.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" + +using namespace llvm; +using namespace llvm::hlsl; + +GlobalVariable *FrontendResource::getGlobalVariable() { + return cast<GlobalVariable>( + cast<ConstantAsMetadata>(Entry->getOperand(0))->getValue()); +} + +StringRef FrontendResource::getSourceType() { + return cast<MDString>(Entry->getOperand(1))->getString(); +} + +uint32_t FrontendResource::FrontendResource::getResourceKind() { + return cast<ConstantInt>( + cast<ConstantAsMetadata>(Entry->getOperand(2))->getValue()) + ->getLimitedValue(); +} +uint32_t FrontendResource::getResourceIndex() { + return cast<ConstantInt>( + cast<ConstantAsMetadata>(Entry->getOperand(3))->getValue()) + ->getLimitedValue(); +} +uint32_t FrontendResource::getSpace() { + return cast<ConstantInt>( + cast<ConstantAsMetadata>(Entry->getOperand(4))->getValue()) + ->getLimitedValue(); +} + +FrontendResource::FrontendResource(GlobalVariable *GV, StringRef TypeStr, + ResourceKind RK, uint32_t ResIndex, + uint32_t Space) { + auto &Ctx = GV->getContext(); + IRBuilder<> B(Ctx); + Entry = MDNode::get( + Ctx, {ValueAsMetadata::get(GV), MDString::get(Ctx, TypeStr), + ConstantAsMetadata::get(B.getInt32(static_cast<int>(RK))), + ConstantAsMetadata::get(B.getInt32(ResIndex)), + ConstantAsMetadata::get(B.getInt32(Space))}); +} diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp index 0f846f7bfee5..50ca01d34e20 100644 --- a/llvm/lib/Frontend/OpenMP/OMPContext.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp @@ -163,19 +163,19 @@ static int isVariantApplicableInContextHelper( // context based on the match kind selected by the user via // `implementation={extensions(match_[all,any,none])}' auto HandleTrait = [MK](TraitProperty Property, - bool WasFound) -> Optional<bool> /* Result */ { + bool WasFound) -> std::optional<bool> /* Result */ { // For kind "any" a single match is enough but we ignore non-matched // properties. if (MK == MK_ANY) { if (WasFound) return true; - return None; + return std::nullopt; } // In "all" or "none" mode we accept a matching or non-matching property // respectively and move on. We are not done yet! if ((WasFound && MK == MK_ALL) || (!WasFound && MK == MK_NONE)) - return None; + return std::nullopt; // We missed a property, provide some debug output and indicate failure. LLVM_DEBUG({ @@ -212,9 +212,8 @@ static int isVariantApplicableInContextHelper( return Ctx.matchesISATrait(RawString); }); - Optional<bool> Result = HandleTrait(Property, IsActiveTrait); - if (Result) - return Result.value(); + if (std::optional<bool> Result = HandleTrait(Property, IsActiveTrait)) + return *Result; } if (!DeviceSetOnly) { @@ -233,9 +232,8 @@ static int isVariantApplicableInContextHelper( if (ConstructMatches) ConstructMatches->push_back(ConstructIdx - 1); - Optional<bool> Result = HandleTrait(Property, FoundInOrder); - if (Result) - return Result.value(); + if (std::optional<bool> Result = HandleTrait(Property, FoundInOrder)) + return *Result; if (!FoundInOrder) { LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] Construct property " diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index cee4cddab5e8..8a4ed30628dc 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" @@ -34,11 +35,13 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include <cstdint> +#include <optional> #define DEBUG_TYPE "openmp-ir-builder" @@ -259,8 +262,7 @@ void llvm::spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, // Move instructions to new block. BasicBlock *Old = IP.getBlock(); - New->getInstList().splice(New->begin(), Old->getInstList(), IP.getPoint(), - Old->end()); + New->splice(New->begin(), Old, IP.getPoint(), Old->end()); if (CreateBranch) BranchInst::Create(New, Old); @@ -328,6 +330,7 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { LLVMContext &Ctx = Fn.getContext(); + Triple T(M.getTargetTriple()); // Get the function's current attributes. auto Attrs = Fn.getAttributes(); @@ -337,6 +340,25 @@ void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo) ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo)); + // Add AS to FnAS while taking special care with integer extensions. + auto addAttrSet = [&](AttributeSet &FnAS, const AttributeSet &AS, + bool Param = true) -> void { + bool HasSignExt = AS.hasAttribute(Attribute::SExt); + bool HasZeroExt = AS.hasAttribute(Attribute::ZExt); + if (HasSignExt || HasZeroExt) { + assert(AS.getNumAttributes() == 1 && + "Currently not handling extension attr combined with others."); + if (Param) { + if (auto AK = TargetLibraryInfo::getExtAttrForI32Param(T, HasSignExt)) + FnAS = FnAS.addAttribute(Ctx, AK); + } else + if (auto AK = TargetLibraryInfo::getExtAttrForI32Return(T, HasSignExt)) + FnAS = FnAS.addAttribute(Ctx, AK); + } else { + FnAS = FnAS.addAttributes(Ctx, AS); + } + }; + #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet; #include "llvm/Frontend/OpenMP/OMPKinds.def" @@ -345,10 +367,9 @@ void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \ case Enum: \ FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \ - RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \ + addAttrSet(RetAttrs, RetAttrSet, /*Param*/false); \ for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \ - ArgAttrs[ArgNo] = \ - ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \ + addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \ Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \ break; #include "llvm/Frontend/OpenMP/OMPKinds.def" @@ -623,7 +644,7 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, return getOrCreateDefaultSrcLocStr(SrcLocStrSize); StringRef FileName = M.getName(); if (DIFile *DIF = DIL->getFile()) - if (Optional<StringRef> Source = DIF->getSource()) + if (std::optional<StringRef> Source = DIF->getSource()) FileName = *Source; StringRef Function = DIL->getScope()->getSubprogram()->getName(); if (Function.empty() && F) @@ -794,7 +815,7 @@ void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name, OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel( const LocationDescription &Loc, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, - ArrayRef<Value *> KernelArgs, ArrayRef<Value *> NoWaitArgs) { + ArrayRef<Value *> KernelArgs) { if (!updateToLocation(Loc)) return Loc.IP; @@ -808,16 +829,11 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel( M.getDataLayout().getPrefTypeAlign(KernelArgs[I]->getType())); } - bool HasNoWait = !NoWaitArgs.empty(); SmallVector<Value *> OffloadingArgs{Ident, DeviceID, NumTeams, NumThreads, HostPtr, KernelArgsPtr}; - if (HasNoWait) - OffloadingArgs.append(NoWaitArgs.begin(), NoWaitArgs.end()); Return = Builder.CreateCall( - HasNoWait - ? getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel_nowait) - : getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel), + getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel), OffloadingArgs); return Builder.saveIP(); @@ -912,34 +928,21 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); - // If there is an if condition we actually use the TIDAddr and ZeroAddr in the - // program, otherwise we only need them for modeling purposes to get the - // associated arguments in the outlined function. In the former case, - // initialize the allocas properly, in the latter case, delete them later. - if (IfCondition) { - Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); - Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); - } else { - ToBeDeleted.push_back(TIDAddr); - ToBeDeleted.push_back(ZeroAddr); - } + // We only need TIDAddr and ZeroAddr for modeling purposes to get the + // associated arguments in the outlined function, so we delete them later. + ToBeDeleted.push_back(TIDAddr); + ToBeDeleted.push_back(ZeroAddr); // Create an artificial insertion point that will also ensure the blocks we // are about to split are not degenerated. auto *UI = new UnreachableInst(Builder.getContext(), InsertBB); - Instruction *ThenTI = UI, *ElseTI = nullptr; - if (IfCondition) - SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); - - BasicBlock *ThenBB = ThenTI->getParent(); - BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry"); - BasicBlock *PRegBodyBB = - PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region"); + BasicBlock *EntryBB = UI->getParent(); + BasicBlock *PRegEntryBB = EntryBB->splitBasicBlock(UI, "omp.par.entry"); + BasicBlock *PRegBodyBB = PRegEntryBB->splitBasicBlock(UI, "omp.par.region"); BasicBlock *PRegPreFiniBB = - PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize"); - BasicBlock *PRegExitBB = - PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); + PRegBodyBB->splitBasicBlock(UI, "omp.par.pre_finalize"); + BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(UI, "omp.par.exit"); auto FiniCBWrapper = [&](InsertPointTy IP) { // Hide "open-ended" blocks from the given FiniCB by setting the right jump @@ -973,7 +976,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( Builder.CreateLoad(Int32, ZeroAddr, "zero.addr.use"); ToBeDeleted.push_back(ZeroAddrUse); - // ThenBB + // EntryBB // | // V // PRegionEntryBB <- Privatization allocas are placed here. @@ -996,8 +999,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( BodyGenCB(InnerAllocaIP, CodeGenIP); LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); + FunctionCallee RTLFn; + if (IfCondition) + RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if); + else + RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); - FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { llvm::LLVMContext &Ctx = F->getContext(); @@ -1032,15 +1039,30 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( CI->getParent()->setName("omp_parallel"); Builder.SetInsertPoint(CI); - // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); + // Build call __kmpc_fork_call[_if](Ident, n, microtask, var1, .., varn); Value *ForkCallArgs[] = { Ident, Builder.getInt32(NumCapturedVars), Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; SmallVector<Value *, 16> RealArgs; RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); + if (IfCondition) { + Value *Cond = Builder.CreateSExtOrTrunc(IfCondition, + Type::getInt32Ty(M.getContext())); + RealArgs.push_back(Cond); + } RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); + // __kmpc_fork_call_if always expects a void ptr as the last argument + // If there are no arguments, pass a null pointer. + auto PtrTy = Type::getInt8PtrTy(M.getContext()); + if (IfCondition && NumCapturedVars == 0) { + llvm::Value *Void = ConstantPointerNull::get(PtrTy); + RealArgs.push_back(Void); + } + if (IfCondition && RealArgs.back()->getType() != PtrTy) + RealArgs.back() = Builder.CreateBitCast(RealArgs.back(), PtrTy); + Builder.CreateCall(RTLFn, RealArgs); LLVM_DEBUG(dbgs() << "With fork_call placed: " @@ -1053,35 +1075,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr); - // If no "if" clause was present we do not need the call created during - // outlining, otherwise we reuse it in the serialized parallel region. - if (!ElseTI) { - CI->eraseFromParent(); - } else { - - // If an "if" clause was present we are now generating the serialized - // version into the "else" branch. - Builder.SetInsertPoint(ElseTI); - - // Build calls __kmpc_serialized_parallel(&Ident, GTid); - Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; - Builder.CreateCall( - getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel), - SerializedParallelCallArgs); - - // OutlinedFn(>id, &zero, CapturedStruct); - CI->removeFromParent(); - Builder.Insert(CI); - - // __kmpc_end_serialized_parallel(&Ident, GTid); - Value *EndArgs[] = {Ident, ThreadID}; - Builder.CreateCall( - getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel), - EndArgs); - - LLVM_DEBUG(dbgs() << "With serialized parallel region: " - << *Builder.GetInsertBlock()->getParent() << "\n"); - } + CI->eraseFromParent(); for (Instruction *I : ToBeDeleted) I->eraseFromParent(); @@ -1288,7 +1282,8 @@ void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, - bool Tied, Value *Final) { + bool Tied, Value *Final, Value *IfCondition, + SmallVector<DependData> Dependencies) { if (!updateToLocation(Loc)) return InsertPointTy(); @@ -1320,7 +1315,8 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.PostOutlineCB = [this, Ident, Tied, Final](Function &OutlinedFn) { + OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, + Dependencies](Function &OutlinedFn) { // The input IR here looks like the following- // ``` // func @current_fn() { @@ -1430,9 +1426,102 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, TaskSize); } - // Emit the @__kmpc_omp_task runtime call to spawn the task - Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task); - Builder.CreateCall(TaskFn, {Ident, ThreadID, NewTaskData}); + Value *DepArrayPtr = nullptr; + if (Dependencies.size()) { + InsertPointTy OldIP = Builder.saveIP(); + Builder.SetInsertPoint( + &OldIP.getBlock()->getParent()->getEntryBlock().back()); + + Type *DepArrayTy = ArrayType::get(DependInfo, Dependencies.size()); + Value *DepArray = + Builder.CreateAlloca(DepArrayTy, nullptr, ".dep.arr.addr"); + + unsigned P = 0; + for (const DependData &Dep : Dependencies) { + Value *Base = + Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, P); + // Store the pointer to the variable + Value *Addr = Builder.CreateStructGEP( + DependInfo, Base, + static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)); + Value *DepValPtr = + Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty()); + Builder.CreateStore(DepValPtr, Addr); + // Store the size of the variable + Value *Size = Builder.CreateStructGEP( + DependInfo, Base, + static_cast<unsigned int>(RTLDependInfoFields::Len)); + Builder.CreateStore(Builder.getInt64(M.getDataLayout().getTypeStoreSize( + Dep.DepValueType)), + Size); + // Store the dependency kind + Value *Flags = Builder.CreateStructGEP( + DependInfo, Base, + static_cast<unsigned int>(RTLDependInfoFields::Flags)); + Builder.CreateStore( + ConstantInt::get(Builder.getInt8Ty(), + static_cast<unsigned int>(Dep.DepKind)), + Flags); + ++P; + } + + DepArrayPtr = Builder.CreateBitCast(DepArray, Builder.getInt8PtrTy()); + Builder.restoreIP(OldIP); + } + + // In the presence of the `if` clause, the following IR is generated: + // ... + // %data = call @__kmpc_omp_task_alloc(...) + // br i1 %if_condition, label %then, label %else + // then: + // call @__kmpc_omp_task(...) + // br label %exit + // else: + // call @__kmpc_omp_task_begin_if0(...) + // call @wrapper_fn(...) + // call @__kmpc_omp_task_complete_if0(...) + // br label %exit + // exit: + // ... + if (IfCondition) { + // `SplitBlockAndInsertIfThenElse` requires the block to have a + // terminator. + BasicBlock *NewBasicBlock = + splitBB(Builder, /*CreateBranch=*/true, "if.end"); + Instruction *IfTerminator = + NewBasicBlock->getSinglePredecessor()->getTerminator(); + Instruction *ThenTI = IfTerminator, *ElseTI = nullptr; + Builder.SetInsertPoint(IfTerminator); + SplitBlockAndInsertIfThenElse(IfCondition, IfTerminator, &ThenTI, + &ElseTI); + Builder.SetInsertPoint(ElseTI); + Function *TaskBeginFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0); + Function *TaskCompleteFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0); + Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, NewTaskData}); + if (HasTaskData) + Builder.CreateCall(WrapperFunc, {ThreadID, NewTaskData}); + else + Builder.CreateCall(WrapperFunc, {ThreadID}); + Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, NewTaskData}); + Builder.SetInsertPoint(ThenTI); + } + + if (Dependencies.size()) { + Function *TaskFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps); + Builder.CreateCall( + TaskFn, + {Ident, ThreadID, NewTaskData, Builder.getInt32(Dependencies.size()), + DepArrayPtr, ConstantInt::get(Builder.getInt32Ty(), 0), + ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext()))}); + + } else { + // Emit the @__kmpc_omp_task runtime call to spawn the task + Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task); + Builder.CreateCall(TaskFn, {Ident, ThreadID, NewTaskData}); + } StaleCI->eraseFromParent(); @@ -2839,32 +2928,40 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, return Result; } -/// Attach loop metadata \p Properties to the loop described by \p Loop. If the -/// loop already has metadata, the loop properties are appended. -static void addLoopMetadata(CanonicalLoopInfo *Loop, - ArrayRef<Metadata *> Properties) { - assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo"); - +/// Attach metadata \p Properties to the basic block described by \p BB. If the +/// basic block already has metadata, the basic block properties are appended. +static void addBasicBlockMetadata(BasicBlock *BB, + ArrayRef<Metadata *> Properties) { // Nothing to do if no property to attach. if (Properties.empty()) return; - LLVMContext &Ctx = Loop->getFunction()->getContext(); - SmallVector<Metadata *> NewLoopProperties; - NewLoopProperties.push_back(nullptr); + LLVMContext &Ctx = BB->getContext(); + SmallVector<Metadata *> NewProperties; + NewProperties.push_back(nullptr); - // If the loop already has metadata, prepend it to the new metadata. - BasicBlock *Latch = Loop->getLatch(); - assert(Latch && "A valid CanonicalLoopInfo must have a unique latch"); - MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop); + // If the basic block already has metadata, prepend it to the new metadata. + MDNode *Existing = BB->getTerminator()->getMetadata(LLVMContext::MD_loop); if (Existing) - append_range(NewLoopProperties, drop_begin(Existing->operands(), 1)); + append_range(NewProperties, drop_begin(Existing->operands(), 1)); - append_range(NewLoopProperties, Properties); - MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties); - LoopID->replaceOperandWith(0, LoopID); + append_range(NewProperties, Properties); + MDNode *BasicBlockID = MDNode::getDistinct(Ctx, NewProperties); + BasicBlockID->replaceOperandWith(0, BasicBlockID); - Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); + BB->getTerminator()->setMetadata(LLVMContext::MD_loop, BasicBlockID); +} + +/// Attach loop metadata \p Properties to the loop described by \p Loop. If the +/// loop already has metadata, the loop properties are appended. +static void addLoopMetadata(CanonicalLoopInfo *Loop, + ArrayRef<Metadata *> Properties) { + assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo"); + + // Attach metadata to the loop's latch + BasicBlock *Latch = Loop->getLatch(); + assert(Latch && "A valid CanonicalLoopInfo must have a unique latch"); + addBasicBlockMetadata(Latch, Properties); } /// Attach llvm.access.group metadata to the memref instructions of \p Block @@ -2895,12 +2992,79 @@ void OpenMPIRBuilder::unrollLoopHeuristic(DebugLoc, CanonicalLoopInfo *Loop) { }); } +void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop, + Value *IfCond, ValueToValueMapTy &VMap, + const Twine &NamePrefix) { + Function *F = CanonicalLoop->getFunction(); + + // Define where if branch should be inserted + Instruction *SplitBefore; + if (Instruction::classof(IfCond)) { + SplitBefore = dyn_cast<Instruction>(IfCond); + } else { + SplitBefore = CanonicalLoop->getPreheader()->getTerminator(); + } + + // TODO: We should not rely on pass manager. Currently we use pass manager + // only for getting llvm::Loop which corresponds to given CanonicalLoopInfo + // object. We should have a method which returns all blocks between + // CanonicalLoopInfo::getHeader() and CanonicalLoopInfo::getAfter() + FunctionAnalysisManager FAM; + FAM.registerPass([]() { return DominatorTreeAnalysis(); }); + FAM.registerPass([]() { return LoopAnalysis(); }); + FAM.registerPass([]() { return PassInstrumentationAnalysis(); }); + + // Get the loop which needs to be cloned + LoopAnalysis LIA; + LoopInfo &&LI = LIA.run(*F, FAM); + Loop *L = LI.getLoopFor(CanonicalLoop->getHeader()); + + // Create additional blocks for the if statement + BasicBlock *Head = SplitBefore->getParent(); + Instruction *HeadOldTerm = Head->getTerminator(); + llvm::LLVMContext &C = Head->getContext(); + llvm::BasicBlock *ThenBlock = llvm::BasicBlock::Create( + C, NamePrefix + ".if.then", Head->getParent(), Head->getNextNode()); + llvm::BasicBlock *ElseBlock = llvm::BasicBlock::Create( + C, NamePrefix + ".if.else", Head->getParent(), CanonicalLoop->getExit()); + + // Create if condition branch. + Builder.SetInsertPoint(HeadOldTerm); + Instruction *BrInstr = + Builder.CreateCondBr(IfCond, ThenBlock, /*ifFalse*/ ElseBlock); + InsertPointTy IP{BrInstr->getParent(), ++BrInstr->getIterator()}; + // Then block contains branch to omp loop which needs to be vectorized + spliceBB(IP, ThenBlock, false); + ThenBlock->replaceSuccessorsPhiUsesWith(Head, ThenBlock); + + Builder.SetInsertPoint(ElseBlock); + + // Clone loop for the else branch + SmallVector<BasicBlock *, 8> NewBlocks; + + VMap[CanonicalLoop->getPreheader()] = ElseBlock; + for (BasicBlock *Block : L->getBlocks()) { + BasicBlock *NewBB = CloneBasicBlock(Block, VMap, "", F); + NewBB->moveBefore(CanonicalLoop->getExit()); + VMap[Block] = NewBB; + NewBlocks.push_back(NewBB); + } + remapInstructionsInBlocks(NewBlocks, VMap); + Builder.CreateBr(NewBlocks.front()); +} + void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, - ConstantInt *Simdlen) { + MapVector<Value *, Value *> AlignedVars, + Value *IfCond, OrderKind Order, + ConstantInt *Simdlen, ConstantInt *Safelen) { LLVMContext &Ctx = Builder.getContext(); Function *F = CanonicalLoop->getFunction(); + // TODO: We should not rely on pass manager. Currently we use pass manager + // only for getting llvm::Loop which corresponds to given CanonicalLoopInfo + // object. We should have a method which returns all blocks between + // CanonicalLoopInfo::getHeader() and CanonicalLoopInfo::getAfter() FunctionAnalysisManager FAM; FAM.registerPass([]() { return DominatorTreeAnalysis(); }); FAM.registerPass([]() { return LoopAnalysis(); }); @@ -2910,6 +3074,35 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, LoopInfo &&LI = LIA.run(*F, FAM); Loop *L = LI.getLoopFor(CanonicalLoop->getHeader()); + if (AlignedVars.size()) { + InsertPointTy IP = Builder.saveIP(); + Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator()); + for (auto &AlignedItem : AlignedVars) { + Value *AlignedPtr = AlignedItem.first; + Value *Alignment = AlignedItem.second; + Builder.CreateAlignmentAssumption(F->getParent()->getDataLayout(), + AlignedPtr, Alignment); + } + Builder.restoreIP(IP); + } + + if (IfCond) { + ValueToValueMapTy VMap; + createIfVersion(CanonicalLoop, IfCond, VMap, "simd"); + // Add metadata to the cloned loop which disables vectorization + Value *MappedLatch = VMap.lookup(CanonicalLoop->getLatch()); + assert(MappedLatch && + "Cannot find value which corresponds to original loop latch"); + assert(isa<BasicBlock>(MappedLatch) && + "Cannot cast mapped latch block value to BasicBlock"); + BasicBlock *NewLatchBlock = dyn_cast<BasicBlock>(MappedLatch); + ConstantAsMetadata *BoolConst = + ConstantAsMetadata::get(ConstantInt::getFalse(Type::getInt1Ty(Ctx))); + addBasicBlockMetadata( + NewLatchBlock, + {MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), + BoolConst})}); + } SmallSet<BasicBlock *, 8> Reachable; @@ -2924,28 +3117,42 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Reachable.insert(Block); } - // Add access group metadata to memory-access instructions. - MDNode *AccessGroup = MDNode::getDistinct(Ctx, {}); - for (BasicBlock *BB : Reachable) - addSimdMetadata(BB, AccessGroup, LI); + SmallVector<Metadata *> LoopMDList; + + // In presence of finite 'safelen', it may be unsafe to mark all + // the memory instructions parallel, because loop-carried + // dependences of 'safelen' iterations are possible. + // If clause order(concurrent) is specified then the memory instructions + // are marked parallel even if 'safelen' is finite. + if ((Safelen == nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) { + // Add access group metadata to memory-access instructions. + MDNode *AccessGroup = MDNode::getDistinct(Ctx, {}); + for (BasicBlock *BB : Reachable) + addSimdMetadata(BB, AccessGroup, LI); + // TODO: If the loop has existing parallel access metadata, have + // to combine two lists. + LoopMDList.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccessGroup})); + } // Use the above access group metadata to create loop level // metadata, which should be distinct for each loop. ConstantAsMetadata *BoolConst = ConstantAsMetadata::get(ConstantInt::getTrue(Type::getInt1Ty(Ctx))); - // TODO: If the loop has existing parallel access metadata, have - // to combine two lists. - addLoopMetadata( - CanonicalLoop, - {MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), - AccessGroup}), - MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), - BoolConst})}); - if (Simdlen != nullptr) - addLoopMetadata( - CanonicalLoop, + LoopMDList.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), BoolConst})); + + if (Simdlen || Safelen) { + // If both simdlen and safelen clauses are specified, the value of the + // simdlen parameter must be less than or equal to the value of the safelen + // parameter. Therefore, use safelen only in the absence of simdlen. + ConstantInt *VectorizeWidth = Simdlen == nullptr ? Safelen : Simdlen; + LoopMDList.push_back( MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.width"), - ConstantAsMetadata::get(Simdlen)})); + ConstantAsMetadata::get(VectorizeWidth)})); + } + + addLoopMetadata(CanonicalLoop, LoopMDList); } /// Create the TargetMachine object to query the backend for optimization @@ -2980,8 +3187,8 @@ createTargetMachine(Function *F, CodeGenOpt::Level OptLevel) { llvm::TargetOptions Options; return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine( - Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None, - OptLevel)); + Triple, CPU, Features, Options, /*RelocModel=*/std::nullopt, + /*CodeModel=*/std::nullopt, OptLevel)); } /// Heuristically determine the best-performant unroll factor for \p CLI. This @@ -3026,12 +3233,12 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) { gatherUnrollingPreferences(L, SE, TTI, /*BlockFrequencyInfo=*/nullptr, /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel, - /*UserThreshold=*/None, - /*UserCount=*/None, + /*UserThreshold=*/std::nullopt, + /*UserCount=*/std::nullopt, /*UserAllowPartial=*/true, /*UserAllowRuntime=*/true, - /*UserUpperBound=*/None, - /*UserFullUnrollMaxCount=*/None); + /*UserUpperBound=*/std::nullopt, + /*UserFullUnrollMaxCount=*/std::nullopt); UP.Force = true; @@ -3107,7 +3314,7 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) { unsigned TripMultiple = 0; bool UseUpperBound = false; - computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount, + computeUnrollCount(L, TTI, DT, &LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP, UseUpperBound); unsigned Factor = UP.Count; @@ -3290,9 +3497,10 @@ OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef<llvm::Value *> StoreValues, const Twine &Name, bool IsDependSource) { - for (size_t I = 0; I < StoreValues.size(); I++) - assert(StoreValues[I]->getType()->isIntegerTy(64) && - "OpenMP runtime requires depend vec with i64 type"); + assert( + llvm::all_of(StoreValues, + [](Value *SV) { return SV->getType()->isIntegerTy(64); }) && + "OpenMP runtime requires depend vec with i64 type"); if (!updateToLocation(Loc)) return Loc.IP; @@ -3423,7 +3631,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( // Emit thenBB and set the Builder's insertion point there for // body generation next. Place the block after the current block. Function *CurFn = EntryBB->getParent(); - CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB); + CurFn->insert(std::next(EntryBB->getIterator()), ThenBB); // Move Entry branch to end of ThenBB, and replace with conditional // branch (If-stmt) @@ -3568,9 +3776,9 @@ CallInst *OpenMPIRBuilder::createOMPInteropInit( Value *ThreadId = getOrCreateThreadID(Ident); if (Device == nullptr) Device = ConstantInt::get(Int32, -1); - Constant *InteropTypeVal = ConstantInt::get(Int64, (int)InteropType); + Constant *InteropTypeVal = ConstantInt::get(Int32, (int)InteropType); if (NumDependences == nullptr) { - NumDependences = ConstantInt::get(Int32, 0); + NumDependences = ConstantInt::get(Int64, 0); PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext()); DependenceAddress = ConstantPointerNull::get(PointerTypeVar); } @@ -3650,7 +3858,7 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate( Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Constant *ThreadPrivateCache = - getOrCreateOMPInternalVariable(Int8PtrPtr, Name); + getOrCreateInternalVariable(Int8PtrPtr, Name.str()); llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; Function *Fn = @@ -3660,8 +3868,7 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate( } OpenMPIRBuilder::InsertPointTy -OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, - bool RequiresFullRuntime) { +OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) { if (!updateToLocation(Loc)) return Loc.IP; @@ -3673,14 +3880,12 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); ConstantInt *UseGenericStateMachine = ConstantInt::getBool(Int32->getContext(), !IsSPMD); - ConstantInt *RequiresFullRuntimeVal = - ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_init); CallInst *ThreadKind = Builder.CreateCall( - Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal}); + Fn, {Ident, IsSPMDVal, UseGenericStateMachine}); Value *ExecUserCode = Builder.CreateICmpEQ( ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), @@ -3714,8 +3919,7 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, } void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, - bool IsSPMD, - bool RequiresFullRuntime) { + bool IsSPMD) { if (!updateToLocation(Loc)) return; @@ -3725,13 +3929,96 @@ void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); - ConstantInt *RequiresFullRuntimeVal = - ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); - Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal}); + Builder.CreateCall(Fn, {Ident, IsSPMDVal}); +} + +void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes( + Function *OutlinedFn, int32_t NumTeams, int32_t NumThreads) { + if (Config.isEmbedded()) { + OutlinedFn->setLinkage(GlobalValue::WeakODRLinkage); + // TODO: Determine if DSO local can be set to true. + OutlinedFn->setDSOLocal(false); + OutlinedFn->setVisibility(GlobalValue::ProtectedVisibility); + if (Triple(M.getTargetTriple()).isAMDGCN()) + OutlinedFn->setCallingConv(CallingConv::AMDGPU_KERNEL); + } + + if (NumTeams > 0) + OutlinedFn->addFnAttr("omp_target_num_teams", std::to_string(NumTeams)); + if (NumThreads > 0) + OutlinedFn->addFnAttr("omp_target_thread_limit", + std::to_string(NumThreads)); +} + +Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn, + StringRef EntryFnIDName) { + if (Config.isEmbedded()) { + assert(OutlinedFn && "The outlined function must exist if embedded"); + return ConstantExpr::getBitCast(OutlinedFn, Builder.getInt8PtrTy()); + } + + return new GlobalVariable( + M, Builder.getInt8Ty(), /*isConstant=*/true, GlobalValue::WeakAnyLinkage, + Constant::getNullValue(Builder.getInt8Ty()), EntryFnIDName); +} + +Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn, + StringRef EntryFnName) { + if (OutlinedFn) + return OutlinedFn; + + assert(!M.getGlobalVariable(EntryFnName, true) && + "Named kernel already exists?"); + return new GlobalVariable( + M, Builder.getInt8Ty(), /*isConstant=*/true, GlobalValue::InternalLinkage, + Constant::getNullValue(Builder.getInt8Ty()), EntryFnName); +} + +void OpenMPIRBuilder::emitTargetRegionFunction( + OffloadEntriesInfoManager &InfoManager, TargetRegionEntryInfo &EntryInfo, + FunctionGenCallback &GenerateFunctionCallback, int32_t NumTeams, + int32_t NumThreads, bool IsOffloadEntry, Function *&OutlinedFn, + Constant *&OutlinedFnID) { + + SmallString<64> EntryFnName; + InfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo); + + OutlinedFn = Config.isEmbedded() || !Config.openMPOffloadMandatory() + ? GenerateFunctionCallback(EntryFnName) + : nullptr; + + // If this target outline function is not an offload entry, we don't need to + // register it. This may be in the case of a false if clause, or if there are + // no OpenMP targets. + if (!IsOffloadEntry) + return; + + std::string EntryFnIDName = + Config.isEmbedded() + ? std::string(EntryFnName) + : createPlatformSpecificName({EntryFnName, "region_id"}); + + OutlinedFnID = registerTargetRegionFunction( + InfoManager, EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName, NumTeams, + NumThreads); +} + +Constant *OpenMPIRBuilder::registerTargetRegionFunction( + OffloadEntriesInfoManager &InfoManager, TargetRegionEntryInfo &EntryInfo, + Function *OutlinedFn, StringRef EntryFnName, StringRef EntryFnIDName, + int32_t NumTeams, int32_t NumThreads) { + if (OutlinedFn) + setOutlinedTargetRegionFunctionAttributes(OutlinedFn, NumTeams, NumThreads); + auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName); + auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName); + InfoManager.registerTargetRegionEntryInfo( + EntryInfo, EntryAddr, OutlinedFnID, + OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion); + return OutlinedFnID; } std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, @@ -3747,18 +4034,16 @@ std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, return OS.str().str(); } -Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( - llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { - // TODO: Replace the twine arg with stringref to get rid of the conversion - // logic. However This is taken from current implementation in clang as is. - // Since this method is used in many places exclusively for OMP internal use - // we will keep it as is for temporarily until we move all users to the - // builder and then, if possible, fix it everywhere in one go. - SmallString<256> Buffer; - llvm::raw_svector_ostream Out(Buffer); - Out << Name; - StringRef RuntimeName = Out.str(); - auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; +std::string +OpenMPIRBuilder::createPlatformSpecificName(ArrayRef<StringRef> Parts) const { + return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(), + Config.separator()); +} + +GlobalVariable * +OpenMPIRBuilder::getOrCreateInternalVariable(Type *Ty, const StringRef &Name, + unsigned AddressSpace) { + auto &Elem = *InternalVars.try_emplace(Name, nullptr).first; if (Elem.second) { assert(cast<PointerType>(Elem.second->getType()) ->isOpaqueOrPointeeTypeMatches(Ty) && @@ -3768,20 +4053,19 @@ Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( // variable for possibly changing that to internal or private, or maybe // create different versions of the function for different OMP internal // variables. - Elem.second = new llvm::GlobalVariable( - M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, - llvm::Constant::getNullValue(Ty), Elem.first(), - /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, - AddressSpace); + Elem.second = new GlobalVariable( + M, Ty, /*IsConstant=*/false, GlobalValue::CommonLinkage, + Constant::getNullValue(Ty), Elem.first(), + /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, AddressSpace); } - return Elem.second; + return cast<GlobalVariable>(&*Elem.second); } Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); - return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); + return getOrCreateInternalVariable(KmpCriticalNameTy, Name); } GlobalVariable * @@ -3842,6 +4126,64 @@ void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc, ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr}); } +void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder, + TargetDataRTArgs &RTArgs, + TargetDataInfo &Info, + bool EmitDebug, + bool ForEndCall) { + assert((!ForEndCall || Info.separateBeginEndCalls()) && + "expected region end call to runtime only when end call is separate"); + auto VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + auto VoidPtrPtrTy = VoidPtrTy->getPointerTo(0); + auto Int64Ty = Type::getInt64Ty(M.getContext()); + auto Int64PtrTy = Type::getInt64PtrTy(M.getContext()); + + if (!Info.NumberOfPtrs) { + RTArgs.BasePointersArray = ConstantPointerNull::get(VoidPtrPtrTy); + RTArgs.PointersArray = ConstantPointerNull::get(VoidPtrPtrTy); + RTArgs.SizesArray = ConstantPointerNull::get(Int64PtrTy); + RTArgs.MapTypesArray = ConstantPointerNull::get(Int64PtrTy); + RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy); + RTArgs.MappersArray = ConstantPointerNull::get(VoidPtrPtrTy); + return; + } + + RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32( + ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), + Info.RTArgs.BasePointersArray, + /*Idx0=*/0, /*Idx1=*/0); + RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32( + ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, + /*Idx0=*/0, + /*Idx1=*/0); + RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32( + ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray, + /*Idx0=*/0, /*Idx1=*/0); + RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32( + ArrayType::get(Int64Ty, Info.NumberOfPtrs), + ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd + : Info.RTArgs.MapTypesArray, + /*Idx0=*/0, + /*Idx1=*/0); + + // Only emit the mapper information arrays if debug information is + // requested. + if (!EmitDebug) + RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy); + else + RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32( + ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray, + /*Idx0=*/0, + /*Idx1=*/0); + // If there is no user-defined mapper, set the mapper array to nullptr to + // avoid an unnecessary data privatization + if (!Info.HasMapper) + RTArgs.MappersArray = ConstantPointerNull::get(VoidPtrPtrTy); + else + RTArgs.MappersArray = + Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy); +} + bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic( const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) { assert(!(AO == AtomicOrdering::NotAtomic || @@ -4006,6 +4348,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( return Builder.saveIP(); } +// FIXME: Duplicating AtomicExpand Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, AtomicRMWInst::BinOp RMWOp) { switch (RMWOp) { @@ -4031,6 +4374,8 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, case AtomicRMWInst::UMin: case AtomicRMWInst::FMax: case AtomicRMWInst::FMin: + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: llvm_unreachable("Unsupported atomic update operation"); } llvm_unreachable("Unsupported atomic update operation"); @@ -4392,10 +4737,10 @@ void OpenMPIRBuilder::initializeTypes(Module &M) { #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ VarName##Ptr = PointerType::getUnqual(VarName); -#define OMP_STRUCT_TYPE(VarName, StructName, ...) \ +#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \ T = StructType::getTypeByName(Ctx, StructName); \ if (!T) \ - T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \ + T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \ VarName = T; \ VarName##Ptr = PointerType::getUnqual(T); #include "llvm/Frontend/OpenMP/OMPKinds.def" @@ -4418,6 +4763,381 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks( } } +void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, + uint64_t Size, int32_t Flags, + GlobalValue::LinkageTypes) { + if (!Config.isTargetCodegen()) { + emitOffloadingEntry(ID, Addr->getName(), Size, Flags); + return; + } + // TODO: Add support for global variables on the device after declare target + // support. + Function *Fn = dyn_cast<Function>(Addr); + if (!Fn) + return; + + Module &M = *(Fn->getParent()); + LLVMContext &Ctx = M.getContext(); + + // Get "nvvm.annotations" metadata node. + NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); + + Metadata *MDVals[] = { + ConstantAsMetadata::get(Fn), MDString::get(Ctx, "kernel"), + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1))}; + // Append metadata to nvvm.annotations. + MD->addOperand(MDNode::get(Ctx, MDVals)); + + // Add a function attribute for the kernel. + Fn->addFnAttr(Attribute::get(Ctx, "kernel")); +} + +// We only generate metadata for function that contain target regions. +void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( + OffloadEntriesInfoManager &OffloadEntriesInfoManager, + EmitMetadataErrorReportFunctionTy &ErrorFn) { + + // If there are no entries, we don't need to do anything. + if (OffloadEntriesInfoManager.empty()) + return; + + LLVMContext &C = M.getContext(); + SmallVector<std::pair<const OffloadEntriesInfoManager::OffloadEntryInfo *, + TargetRegionEntryInfo>, + 16> + OrderedEntries(OffloadEntriesInfoManager.size()); + + // Auxiliary methods to create metadata values and strings. + auto &&GetMDInt = [this](unsigned V) { + return ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), V)); + }; + + auto &&GetMDString = [&C](StringRef V) { return MDString::get(C, V); }; + + // Create the offloading info metadata node. + NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); + auto &&TargetRegionMetadataEmitter = + [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( + const TargetRegionEntryInfo &EntryInfo, + const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &E) { + // Generate metadata for target regions. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (0). + // - Entry 1 -> Device ID of the file where the entry was identified. + // - Entry 2 -> File ID of the file where the entry was identified. + // - Entry 3 -> Mangled name of the function where the entry was + // identified. + // - Entry 4 -> Line in the file where the entry was identified. + // - Entry 5 -> Count of regions at this DeviceID/FilesID/Line. + // - Entry 6 -> Order the entry was created. + // The first element of the metadata node is the kind. + Metadata *Ops[] = { + GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID), + GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName), + GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count), + GetMDInt(E.getOrder())}; + + // Save this entry in the right position of the ordered entries array. + OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo); + + // Add metadata to the named metadata node. + MD->addOperand(MDNode::get(C, Ops)); + }; + + OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( + TargetRegionMetadataEmitter); + + // Create function that emits metadata for each device global variable entry; + auto &&DeviceGlobalVarMetadataEmitter = + [&C, &OrderedEntries, &GetMDInt, &GetMDString, MD]( + StringRef MangledName, + const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &E) { + // Generate metadata for global variables. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (1). + // - Entry 1 -> Mangled name of the variable. + // - Entry 2 -> Declare target kind. + // - Entry 3 -> Order the entry was created. + // The first element of the metadata node is the kind. + Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName), + GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; + + // Save this entry in the right position of the ordered entries array. + TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0); + OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo); + + // Add metadata to the named metadata node. + MD->addOperand(MDNode::get(C, Ops)); + }; + + OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( + DeviceGlobalVarMetadataEmitter); + + for (const auto &E : OrderedEntries) { + assert(E.first && "All ordered entries must exist!"); + if (const auto *CE = + dyn_cast<OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>( + E.first)) { + if (!CE->getID() || !CE->getAddress()) { + // Do not blame the entry if the parent funtion is not emitted. + TargetRegionEntryInfo EntryInfo = E.second; + StringRef FnName = EntryInfo.ParentName; + if (!M.getNamedValue(FnName)) + continue; + ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo); + continue; + } + createOffloadEntry(CE->getID(), CE->getAddress(), + /*Size=*/0, CE->getFlags(), + GlobalValue::WeakAnyLinkage); + } else if (const auto *CE = dyn_cast< + OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>( + E.first)) { + OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags = + static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind>( + CE->getFlags()); + switch (Flags) { + case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo: { + if (Config.isEmbedded() && Config.hasRequiresUnifiedSharedMemory()) + continue; + if (!CE->getAddress()) { + ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR, E.second); + continue; + } + // The vaiable has no definition - no need to add the entry. + if (CE->getVarSize() == 0) + continue; + break; + } + case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink: + assert(((Config.isEmbedded() && !CE->getAddress()) || + (!Config.isEmbedded() && CE->getAddress())) && + "Declaret target link address is set."); + if (Config.isEmbedded()) + continue; + if (!CE->getAddress()) { + ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo()); + continue; + } + break; + } + + // Hidden or internal symbols on the device are not externally visible. + // We should not attempt to register them by creating an offloading + // entry. + if (auto *GV = dyn_cast<GlobalValue>(CE->getAddress())) + if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) + continue; + + createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(), + Flags, CE->getLinkage()); + + } else { + llvm_unreachable("Unsupported entry kind."); + } + } +} + +void TargetRegionEntryInfo::getTargetRegionEntryFnName( + SmallVectorImpl<char> &Name, StringRef ParentName, unsigned DeviceID, + unsigned FileID, unsigned Line, unsigned Count) { + raw_svector_ostream OS(Name); + OS << "__omp_offloading" << llvm::format("_%x", DeviceID) + << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; + if (Count) + OS << "_" << Count; +} + +void OffloadEntriesInfoManager::getTargetRegionEntryFnName( + SmallVectorImpl<char> &Name, const TargetRegionEntryInfo &EntryInfo) { + unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo); + TargetRegionEntryInfo::getTargetRegionEntryFnName( + Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID, + EntryInfo.Line, NewCount); +} + +/// Loads all the offload entries information from the host IR +/// metadata. +void OpenMPIRBuilder::loadOffloadInfoMetadata( + Module &M, OffloadEntriesInfoManager &OffloadEntriesInfoManager) { + // If we are in target mode, load the metadata from the host IR. This code has + // to match the metadata creation in createOffloadEntriesAndInfoMetadata(). + + NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName); + if (!MD) + return; + + for (MDNode *MN : MD->operands()) { + auto &&GetMDInt = [MN](unsigned Idx) { + auto *V = cast<ConstantAsMetadata>(MN->getOperand(Idx)); + return cast<ConstantInt>(V->getValue())->getZExtValue(); + }; + + auto &&GetMDString = [MN](unsigned Idx) { + auto *V = cast<MDString>(MN->getOperand(Idx)); + return V->getString(); + }; + + switch (GetMDInt(0)) { + default: + llvm_unreachable("Unexpected metadata!"); + break; + case OffloadEntriesInfoManager::OffloadEntryInfo:: + OffloadingEntryInfoTargetRegion: { + TargetRegionEntryInfo EntryInfo(/*ParentName=*/GetMDString(3), + /*DeviceID=*/GetMDInt(1), + /*FileID=*/GetMDInt(2), + /*Line=*/GetMDInt(4), + /*Count=*/GetMDInt(5)); + OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( + EntryInfo, /*Order=*/GetMDInt(6)); + break; + } + case OffloadEntriesInfoManager::OffloadEntryInfo:: + OffloadingEntryInfoDeviceGlobalVar: + OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( + /*MangledName=*/GetMDString(1), + static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind>( + /*Flags=*/GetMDInt(2)), + /*Order=*/GetMDInt(3)); + break; + } + } +} + +bool OffloadEntriesInfoManager::empty() const { + return OffloadEntriesTargetRegion.empty() && + OffloadEntriesDeviceGlobalVar.empty(); +} + +unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount( + const TargetRegionEntryInfo &EntryInfo) const { + auto It = OffloadEntriesTargetRegionCount.find( + getTargetRegionEntryCountKey(EntryInfo)); + if (It == OffloadEntriesTargetRegionCount.end()) + return 0; + return It->second; +} + +void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount( + const TargetRegionEntryInfo &EntryInfo) { + OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] = + EntryInfo.Count + 1; +} + +/// Initialize target region entry. +void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo( + const TargetRegionEntryInfo &EntryInfo, unsigned Order) { + OffloadEntriesTargetRegion[EntryInfo] = + OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, + OMPTargetRegionEntryTargetRegion); + ++OffloadingEntriesNum; +} + +void OffloadEntriesInfoManager::registerTargetRegionEntryInfo( + TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, + OMPTargetRegionEntryKind Flags) { + assert(EntryInfo.Count == 0 && "expected default EntryInfo"); + + // Update the EntryInfo with the next available count for this location. + EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo); + + // If we are emitting code for a target, the entry is already initialized, + // only has to be registered. + if (Config.isEmbedded()) { + // This could happen if the device compilation is invoked standalone. + if (!hasTargetRegionEntryInfo(EntryInfo)) { + return; + } + auto &Entry = OffloadEntriesTargetRegion[EntryInfo]; + Entry.setAddress(Addr); + Entry.setID(ID); + Entry.setFlags(Flags); + } else { + if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion && + hasTargetRegionEntryInfo(EntryInfo, /*IgnoreAddressId*/ true)) + return; + assert(!hasTargetRegionEntryInfo(EntryInfo) && + "Target region entry already registered!"); + OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); + OffloadEntriesTargetRegion[EntryInfo] = Entry; + ++OffloadingEntriesNum; + } + incrementTargetRegionEntryInfoCount(EntryInfo); +} + +bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo( + TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId) const { + + // Update the EntryInfo with the next available count for this location. + EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo); + + auto It = OffloadEntriesTargetRegion.find(EntryInfo); + if (It == OffloadEntriesTargetRegion.end()) { + return false; + } + // Fail if this entry is already registered. + if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID())) + return false; + return true; +} + +void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo( + const OffloadTargetRegionEntryInfoActTy &Action) { + // Scan all target region entries and perform the provided action. + for (const auto &It : OffloadEntriesTargetRegion) { + Action(It.first, It.second); + } +} + +void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo( + StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order) { + OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); + ++OffloadingEntriesNum; +} + +void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo( + StringRef VarName, Constant *Addr, int64_t VarSize, + OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage) { + if (Config.isEmbedded()) { + // This could happen if the device compilation is invoked standalone. + if (!hasDeviceGlobalVarEntryInfo(VarName)) + return; + auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; + if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { + if (Entry.getVarSize() == 0) { + Entry.setVarSize(VarSize); + Entry.setLinkage(Linkage); + } + return; + } + Entry.setVarSize(VarSize); + Entry.setLinkage(Linkage); + Entry.setAddress(Addr); + } else { + if (hasDeviceGlobalVarEntryInfo(VarName)) { + auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; + assert(Entry.isValid() && Entry.getFlags() == Flags && + "Entry not initialized!"); + if (Entry.getVarSize() == 0) { + Entry.setVarSize(VarSize); + Entry.setLinkage(Linkage); + } + return; + } + OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum, + Addr, VarSize, Flags, Linkage); + ++OffloadingEntriesNum; + } +} + +void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo( + const OffloadDeviceGlobalVarEntryInfoActTy &Action) { + // Scan all target region entries and perform the provided action. + for (const auto &E : OffloadEntriesDeviceGlobalVar) + Action(E.getKey(), E.getValue()); +} + void CanonicalLoopInfo::collectControlBlocks( SmallVectorImpl<BasicBlock *> &BBs) { // We only count those BBs as control block for which we do not need to |