diff options
Diffstat (limited to 'llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp')
-rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 780 |
1 files changed, 638 insertions, 142 deletions
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 739c2998baa8f..9468a3aa3c8dd 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -18,8 +18,8 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/MDBuilder.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -31,7 +31,6 @@ using namespace llvm; using namespace omp; -using namespace types; static cl::opt<bool> OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, @@ -59,13 +58,17 @@ void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { } } -Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) { +FunctionCallee +OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) { + FunctionType *FnTy = nullptr; Function *Fn = nullptr; // Try to find the declation in the module first. switch (FnID) { #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ case Enum: \ + FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \ + IsVarArg); \ Fn = M.getFunction(Str); \ break; #include "llvm/Frontend/OpenMP/OMPKinds.def" @@ -74,25 +77,113 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) { if (!Fn) { // Create a new declaration if we need one. switch (FnID) { -#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ +#define OMP_RTL(Enum, Str, ...) \ case Enum: \ - Fn = Function::Create(FunctionType::get(ReturnType, \ - ArrayRef<Type *>{__VA_ARGS__}, \ - IsVarArg), \ - GlobalValue::ExternalLinkage, Str, M); \ + Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \ break; #include "llvm/Frontend/OpenMP/OMPKinds.def" } + // Add information if the runtime function takes a callback function + if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) { + if (!Fn->hasMetadata(LLVMContext::MD_callback)) { + LLVMContext &Ctx = Fn->getContext(); + MDBuilder MDB(Ctx); + // Annotate the callback behavior of the runtime function: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the runtime function are passed to the + // callback callee. + Fn->addMetadata( + LLVMContext::MD_callback, + *MDNode::get(Ctx, {MDB.createCallbackEncoding( + 2, {-1, -1}, /* VarArgsArePassed */ true)})); + } + } + + LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName() + << " with type " << *Fn->getFunctionType() << "\n"); addAttributes(FnID, *Fn); + + } else { + LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName() + << " with type " << *Fn->getFunctionType() << "\n"); } assert(Fn && "Failed to create OpenMP runtime function"); + + // Cast the function to the expected type if necessary + Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo()); + return {FnTy, C}; +} + +Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { + FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID); + auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee()); + assert(Fn && "Failed to create OpenMP runtime function pointer"); return Fn; } void OpenMPIRBuilder::initialize() { initializeTypes(M); } +void OpenMPIRBuilder::finalize() { + SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; + SmallVector<BasicBlock *, 32> Blocks; + for (OutlineInfo &OI : OutlineInfos) { + ParallelRegionBlockSet.clear(); + Blocks.clear(); + OI.collectBlocks(ParallelRegionBlockSet, Blocks); + + Function *OuterFn = OI.EntryBB->getParent(); + CodeExtractorAnalysisCache CEAC(*OuterFn); + CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, + /* AggregateArgs */ false, + /* BlockFrequencyInfo */ nullptr, + /* BranchProbabilityInfo */ nullptr, + /* AssumptionCache */ nullptr, + /* AllowVarArgs */ true, + /* AllowAlloca */ true, + /* Suffix */ ".omp_par"); + + LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); + LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() + << " Exit: " << OI.ExitBB->getName() << "\n"); + assert(Extractor.isEligible() && + "Expected OpenMP outlining to be possible!"); + + Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); + + LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); + LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); + assert(OutlinedFn->getReturnType()->isVoidTy() && + "OpenMP outlined functions should not return a value!"); + + // For compability with the clang CG we move the outlined function after the + // one with the parallel region. + OutlinedFn->removeFromParent(); + M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); + + // Remove the artificial entry introduced by the extractor right away, we + // made our own entry block after all. + { + BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); + assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); + assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); + OI.EntryBB->moveBefore(&ArtificialEntry); + ArtificialEntry.eraseFromParent(); + } + assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); + assert(OutlinedFn && OutlinedFn->getNumUses() == 1); + + // Run a user callback, e.g. to add attributes. + if (OI.PostOutlineCB) + OI.PostOutlineCB(*OutlinedFn); + } + + // Allow finalize to be called multiple times. + OutlineInfos.clear(); +} + Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, IdentFlag LocFlags) { // Enable "C-mode". @@ -165,7 +256,7 @@ OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { return Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num), Ident, + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident, "omp_global_thread_num"); } @@ -212,10 +303,11 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, bool UseCancelBarrier = !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel); - Value *Result = Builder.CreateCall( - getOrCreateRuntimeFunction(UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier - : OMPRTL___kmpc_barrier), - Args); + Value *Result = + Builder.CreateCall(getOrCreateRuntimeFunctionPtr( + UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier + : OMPRTL___kmpc_barrier), + Args); if (UseCancelBarrier && CheckCancelFlag) emitCancelationCheckImpl(Result, OMPD_parallel); @@ -253,7 +345,7 @@ OpenMPIRBuilder::CreateCancel(const LocationDescription &Loc, Value *Ident = getOrCreateIdent(SrcLocStr); Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_cancel), Args); + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); // The actual cancel logic is shared with others, e.g., cancel_barriers. emitCancelationCheckImpl(Result, CanceledDirective); @@ -318,7 +410,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( Ident, ThreadID, Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args); + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args); } if (ProcBind != OMP_PROC_BIND_default) { @@ -326,8 +418,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( Value *Args[] = { Ident, ThreadID, ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; - Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind), - Args); + Builder.CreateCall( + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args); } BasicBlock *InsertBB = Builder.GetInsertBlock(); @@ -415,32 +507,135 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( // PRegionExitBB <- A common exit to simplify block collection. // - LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n"); // Let the caller create the body. assert(BodyGenCB && "Expected body generation callback!"); InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB); - LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); + + FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); + if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { + if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { + llvm::LLVMContext &Ctx = F->getContext(); + MDBuilder MDB(Ctx); + // Annotate the callback behavior of the __kmpc_fork_call: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the __kmpc_fork_call are passed to the + // callback callee. + F->addMetadata( + llvm::LLVMContext::MD_callback, + *llvm::MDNode::get( + Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, + /* VarArgsArePassed */ true)})); + } + } + + OutlineInfo OI; + OI.PostOutlineCB = [=](Function &OutlinedFn) { + // Add some known attributes. + OutlinedFn.addParamAttr(0, Attribute::NoAlias); + OutlinedFn.addParamAttr(1, Attribute::NoAlias); + OutlinedFn.addFnAttr(Attribute::NoUnwind); + OutlinedFn.addFnAttr(Attribute::NoRecurse); + + assert(OutlinedFn.arg_size() >= 2 && + "Expected at least tid and bounded tid as arguments"); + unsigned NumCapturedVars = + OutlinedFn.arg_size() - /* tid & bounded tid */ 2; + + CallInst *CI = cast<CallInst>(OutlinedFn.user_back()); + CI->getParent()->setName("omp_parallel"); + Builder.SetInsertPoint(CI); + + // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); + Value *ForkCallArgs[] = { + Ident, Builder.getInt32(NumCapturedVars), + Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; + + SmallVector<Value *, 16> RealArgs; + RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); + RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); + + Builder.CreateCall(RTLFn, RealArgs); + + LLVM_DEBUG(dbgs() << "With fork_call placed: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + + InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); + + // Initialize the local TID stack location with the argument value. + Builder.SetInsertPoint(PrivTID); + Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); + Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); + + // If no "if" clause was present we do not need the call created during + // outlining, otherwise we reuse it in the serialized parallel region. + if (!ElseTI) { + CI->eraseFromParent(); + } else { + + // If an "if" clause was present we are now generating the serialized + // version into the "else" branch. + Builder.SetInsertPoint(ElseTI); + + // Build calls __kmpc_serialized_parallel(&Ident, GTid); + Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel), + SerializedParallelCallArgs); + + // OutlinedFn(>id, &zero, CapturedStruct); + CI->removeFromParent(); + Builder.Insert(CI); + + // __kmpc_end_serialized_parallel(&Ident, GTid); + Value *EndArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel), + EndArgs); + + LLVM_DEBUG(dbgs() << "With serialized parallel region: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + } + + for (Instruction *I : ToBeDeleted) + I->eraseFromParent(); + }; + + // Adjust the finalization stack, verify the adjustment, and call the + // finalize function a last time to finalize values between the pre-fini + // block and the exit block if we left the parallel "the normal way". + auto FiniInfo = FinalizationStack.pop_back_val(); + (void)FiniInfo; + assert(FiniInfo.DK == OMPD_parallel && + "Unexpected finalization stack state!"); + + Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); + + InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); + FiniCB(PreFiniIP); + + OI.EntryBB = PRegEntryBB; + OI.ExitBB = PRegExitBB; SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; - SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist; - ParallelRegionBlockSet.insert(PRegEntryBB); - ParallelRegionBlockSet.insert(PRegExitBB); + SmallVector<BasicBlock *, 32> Blocks; + OI.collectBlocks(ParallelRegionBlockSet, Blocks); - // Collect all blocks in-between PRegEntryBB and PRegExitBB. - Worklist.push_back(PRegEntryBB); - while (!Worklist.empty()) { - BasicBlock *BB = Worklist.pop_back_val(); - ParallelRegionBlocks.push_back(BB); - for (BasicBlock *SuccBB : successors(BB)) - if (ParallelRegionBlockSet.insert(SuccBB).second) - Worklist.push_back(SuccBB); - } + // Ensure a single exit node for the outlined region by creating one. + // We might have multiple incoming edges to the exit now due to finalizations, + // e.g., cancel calls that cause the control flow to leave the region. + BasicBlock *PRegOutlinedExitBB = PRegExitBB; + PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); + PRegOutlinedExitBB->setName("omp.par.outlined.exit"); + Blocks.push_back(PRegOutlinedExitBB); CodeExtractorAnalysisCache CEAC(*OuterFn); - CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr, + CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, @@ -455,10 +650,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); - LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); FunctionCallee TIDRTLFn = - getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num); + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); auto PrivHelper = [&](Value &V) { if (&V == TIDAddr || &V == ZeroAddr) @@ -491,142 +686,443 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); PrivHelper(*Input); } - for (Value *Output : Outputs) { - LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); - PrivHelper(*Output); - } + assert(Outputs.empty() && + "OpenMP outlining should not produce live-out values!"); - LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); LLVM_DEBUG({ - for (auto *BB : ParallelRegionBlocks) + for (auto *BB : Blocks) dbgs() << " PBR: " << BB->getName() << "\n"; }); - // Add some known attributes to the outlined function. - Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); - OutlinedFn->addParamAttr(0, Attribute::NoAlias); - OutlinedFn->addParamAttr(1, Attribute::NoAlias); - OutlinedFn->addFnAttr(Attribute::NoUnwind); - OutlinedFn->addFnAttr(Attribute::NoRecurse); - - LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n"); - LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); - - // For compability with the clang CG we move the outlined function after the - // one with the parallel region. - OutlinedFn->removeFromParent(); - M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); - - // Remove the artificial entry introduced by the extractor right away, we - // made our own entry block after all. - { - BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); - assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB); - assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry); - PRegEntryBB->moveBefore(&ArtificialEntry); - ArtificialEntry.eraseFromParent(); - } - LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n"); - assert(&OutlinedFn->getEntryBlock() == PRegEntryBB); + // Register the outlined info. + addOutlineInfo(std::move(OI)); + + InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); + UI->eraseFromParent(); - assert(OutlinedFn && OutlinedFn->getNumUses() == 1); - assert(OutlinedFn->arg_size() >= 2 && - "Expected at least tid and bounded tid as arguments"); - unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2; + return AfterIP; +} - CallInst *CI = cast<CallInst>(OutlinedFn->user_back()); - CI->getParent()->setName("omp_parallel"); - Builder.SetInsertPoint(CI); +void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { + // Build call void __kmpc_flush(ident_t *loc) + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Args[] = {getOrCreateIdent(SrcLocStr)}; - // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); - Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars), - Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)}; + Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); +} - SmallVector<Value *, 16> RealArgs; - RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); - RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); +void OpenMPIRBuilder::CreateFlush(const LocationDescription &Loc) { + if (!updateToLocation(Loc)) + return; + emitFlush(Loc); +} - FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); - if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { - if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { - llvm::LLVMContext &Ctx = F->getContext(); - MDBuilder MDB(Ctx); - // Annotate the callback behavior of the __kmpc_fork_call: - // - The callback callee is argument number 2 (microtask). - // - The first two arguments of the callback callee are unknown (-1). - // - All variadic arguments to the __kmpc_fork_call are passed to the - // callback callee. - F->addMetadata( - llvm::LLVMContext::MD_callback, - *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( - 2, {-1, -1}, - /* VarArgsArePassed */ true)})); +void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { + // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 + // global_tid); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; + + // Ignore return result until untied tasks are supported. + Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), + Args); +} + +void OpenMPIRBuilder::CreateTaskwait(const LocationDescription &Loc) { + if (!updateToLocation(Loc)) + return; + emitTaskwaitImpl(Loc); +} + +void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { + // Build call __kmpc_omp_taskyield(loc, thread_id, 0); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Constant *I32Null = ConstantInt::getNullValue(Int32); + Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; + + Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), + Args); +} + +void OpenMPIRBuilder::CreateTaskyield(const LocationDescription &Loc) { + if (!updateToLocation(Loc)) + return; + emitTaskyieldImpl(Loc); +} + +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::CreateMaster(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB) { + + if (!updateToLocation(Loc)) + return Loc.IP; + + Directive OMPD = Directive::OMPD_master; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *Args[] = {Ident, ThreadId}; + + Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master); + Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); + + Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master); + Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); + + return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, + /*Conditional*/ true, /*hasFinalize*/ true); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::CreateCritical( + const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { + + if (!updateToLocation(Loc)) + return Loc.IP; + + Directive OMPD = Directive::OMPD_critical; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *LockVar = getOMPCriticalRegionLock(CriticalName); + Value *Args[] = {Ident, ThreadId, LockVar}; + + SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args)); + Function *RTFn = nullptr; + if (HintInst) { + // Add Hint to entry Args and create call + EnterArgs.push_back(HintInst); + RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint); + } else { + RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical); + } + Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs); + + Function *ExitRTLFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical); + Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); + + return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, + /*Conditional*/ false, /*hasFinalize*/ true); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( + Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, + BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, + bool HasFinalize) { + + if (HasFinalize) + FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false}); + + // Create inlined region's entry and body blocks, in preparation + // for conditional creation + BasicBlock *EntryBB = Builder.GetInsertBlock(); + Instruction *SplitPos = EntryBB->getTerminator(); + if (!isa_and_nonnull<BranchInst>(SplitPos)) + SplitPos = new UnreachableInst(Builder.getContext(), EntryBB); + BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end"); + BasicBlock *FiniBB = + EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); + + Builder.SetInsertPoint(EntryBB->getTerminator()); + emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); + + // generate body + BodyGenCB(/* AllocaIP */ InsertPointTy(), + /* CodeGenIP */ Builder.saveIP(), *FiniBB); + + // If we didn't emit a branch to FiniBB during body generation, it means + // FiniBB is unreachable (e.g. while(1);). stop generating all the + // unreachable blocks, and remove anything we are not going to use. + auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); + if (SkipEmittingRegion) { + FiniBB->eraseFromParent(); + ExitCall->eraseFromParent(); + // Discard finalization if we have it. + if (HasFinalize) { + assert(!FinalizationStack.empty() && + "Unexpected finalization stack state!"); + FinalizationStack.pop_back(); } + } else { + // emit exit call and do any needed finalization. + auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); + assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && + FiniBB->getTerminator()->getSuccessor(0) == ExitBB && + "Unexpected control flow graph state!!"); + emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); + assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && + "Unexpected Control Flow State!"); + MergeBlockIntoPredecessor(FiniBB); } - Builder.CreateCall(RTLFn, RealArgs); + // If we are skipping the region of a non conditional, remove the exit + // block, and clear the builder's insertion point. + assert(SplitPos->getParent() == ExitBB && + "Unexpected Insertion point location!"); + if (!Conditional && SkipEmittingRegion) { + ExitBB->eraseFromParent(); + Builder.ClearInsertionPoint(); + } else { + auto merged = MergeBlockIntoPredecessor(ExitBB); + BasicBlock *ExitPredBB = SplitPos->getParent(); + auto InsertBB = merged ? ExitPredBB : ExitBB; + if (!isa_and_nonnull<BranchInst>(SplitPos)) + SplitPos->eraseFromParent(); + Builder.SetInsertPoint(InsertBB); + } - LLVM_DEBUG(dbgs() << "With fork_call placed: " - << *Builder.GetInsertBlock()->getParent() << "\n"); + return Builder.saveIP(); +} - InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); - InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( + Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) { + + // if nothing to do, Return current insertion point. + if (!Conditional) + return Builder.saveIP(); + + BasicBlock *EntryBB = Builder.GetInsertBlock(); + Value *CallBool = Builder.CreateIsNotNull(EntryCall); + auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); + auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); + + // Emit thenBB and set the Builder's insertion point there for + // body generation next. Place the block after the current block. + Function *CurFn = EntryBB->getParent(); + CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB); + + // Move Entry branch to end of ThenBB, and replace with conditional + // branch (If-stmt) + Instruction *EntryBBTI = EntryBB->getTerminator(); + Builder.CreateCondBr(CallBool, ThenBB, ExitBB); + EntryBBTI->removeFromParent(); + Builder.SetInsertPoint(UI); + Builder.Insert(EntryBBTI); UI->eraseFromParent(); + Builder.SetInsertPoint(ThenBB->getTerminator()); + + // return an insertion point to ExitBB. + return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt()); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( + omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, + bool HasFinalize) { + + Builder.restoreIP(FinIP); + + // If there is finalization to do, emit it before the exit call + if (HasFinalize) { + assert(!FinalizationStack.empty() && + "Unexpected finalization stack state!"); + + FinalizationInfo Fi = FinalizationStack.pop_back_val(); + assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); + + Fi.FiniCB(FinIP); + + BasicBlock *FiniBB = FinIP.getBlock(); + Instruction *FiniBBTI = FiniBB->getTerminator(); - // Initialize the local TID stack location with the argument value. - Builder.SetInsertPoint(PrivTID); - Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin(); - Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); + // set Builder IP for call creation + Builder.SetInsertPoint(FiniBBTI); + } + + // place the Exitcall as last instruction before Finalization block terminator + ExitCall->removeFromParent(); + Builder.Insert(ExitCall); + + return IRBuilder<>::InsertPoint(ExitCall->getParent(), + ExitCall->getIterator()); +} - // If no "if" clause was present we do not need the call created during - // outlining, otherwise we reuse it in the serialized parallel region. - if (!ElseTI) { - CI->eraseFromParent(); +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::CreateCopyinClauseBlocks( + InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, + llvm::IntegerType *IntPtrTy, bool BranchtoEnd) { + if (!IP.isSet()) + return IP; + + IRBuilder<>::InsertPointGuard IPG(Builder); + + // creates the following CFG structure + // OMP_Entry : (MasterAddr != PrivateAddr)? + // F T + // | \ + // | copin.not.master + // | / + // v / + // copyin.not.master.end + // | + // v + // OMP.Entry.Next + + BasicBlock *OMP_Entry = IP.getBlock(); + Function *CurFn = OMP_Entry->getParent(); + BasicBlock *CopyBegin = + BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn); + BasicBlock *CopyEnd = nullptr; + + // If entry block is terminated, split to preserve the branch to following + // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is. + if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) { + CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(), + "copyin.not.master.end"); + OMP_Entry->getTerminator()->eraseFromParent(); } else { + CopyEnd = + BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn); + } - // If an "if" clause was present we are now generating the serialized - // version into the "else" branch. - Builder.SetInsertPoint(ElseTI); + Builder.SetInsertPoint(OMP_Entry); + Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy); + Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy); + Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr); + Builder.CreateCondBr(cmp, CopyBegin, CopyEnd); - // Build calls __kmpc_serialized_parallel(&Ident, GTid); - Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; - Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel), - SerializedParallelCallArgs); + Builder.SetInsertPoint(CopyBegin); + if (BranchtoEnd) + Builder.SetInsertPoint(Builder.CreateBr(CopyEnd)); - // OutlinedFn(>id, &zero, CapturedStruct); - CI->removeFromParent(); - Builder.Insert(CI); + return Builder.saveIP(); +} - // __kmpc_end_serialized_parallel(&Ident, GTid); - Value *EndArgs[] = {Ident, ThreadID}; - Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel), - EndArgs); +CallInst *OpenMPIRBuilder::CreateOMPAlloc(const LocationDescription &Loc, + Value *Size, Value *Allocator, + std::string Name) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); - LLVM_DEBUG(dbgs() << "With serialized parallel region: " - << *Builder.GetInsertBlock()->getParent() << "\n"); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *Args[] = {ThreadId, Size, Allocator}; + + Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc); + + return Builder.CreateCall(Fn, Args, Name); +} + +CallInst *OpenMPIRBuilder::CreateOMPFree(const LocationDescription &Loc, + Value *Addr, Value *Allocator, + std::string Name) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *Args[] = {ThreadId, Addr, Allocator}; + Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); + return Builder.CreateCall(Fn, Args, Name); +} + +CallInst *OpenMPIRBuilder::CreateCachedThreadPrivate( + const LocationDescription &Loc, llvm::Value *Pointer, + llvm::ConstantInt *Size, const llvm::Twine &Name) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Constant *ThreadPrivateCache = + getOrCreateOMPInternalVariable(Int8PtrPtr, Name); + llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; + + Function *Fn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); + + return Builder.CreateCall(Fn, Args); +} + +std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, + StringRef FirstSeparator, + StringRef Separator) { + SmallString<128> Buffer; + llvm::raw_svector_ostream OS(Buffer); + StringRef Sep = FirstSeparator; + for (StringRef Part : Parts) { + OS << Sep << Part; + Sep = Separator; } + return OS.str().str(); +} - // Adjust the finalization stack, verify the adjustment, and call the - // finalize function a last time to finalize values between the pre-fini block - // and the exit block if we left the parallel "the normal way". - auto FiniInfo = FinalizationStack.pop_back_val(); - (void)FiniInfo; - assert(FiniInfo.DK == OMPD_parallel && - "Unexpected finalization stack state!"); +Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( + llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { + // TODO: Replace the twine arg with stringref to get rid of the conversion + // logic. However This is taken from current implementation in clang as is. + // Since this method is used in many places exclusively for OMP internal use + // we will keep it as is for temporarily until we move all users to the + // builder and then, if possible, fix it everywhere in one go. + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Name; + StringRef RuntimeName = Out.str(); + auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; + if (Elem.second) { + assert(Elem.second->getType()->getPointerElementType() == Ty && + "OMP internal variable has different type than requested"); + } else { + // TODO: investigate the appropriate linkage type used for the global + // variable for possibly changing that to internal or private, or maybe + // create different versions of the function for different OMP internal + // variables. + Elem.second = new llvm::GlobalVariable( + M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, + llvm::Constant::getNullValue(Ty), Elem.first(), + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + AddressSpace); + } - Instruction *PreFiniTI = PRegPreFiniBB->getTerminator(); - assert(PreFiniTI->getNumSuccessors() == 1 && - PreFiniTI->getSuccessor(0)->size() == 1 && - isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) && - "Unexpected CFG structure!"); + return Elem.second; +} - InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator()); - FiniCB(PreFiniIP); +Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { + std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); + std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); + return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); +} - for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); +// Create all simple and struct types exposed by the runtime and remember +// the llvm::PointerTypes of them for easy access later. +void OpenMPIRBuilder::initializeTypes(Module &M) { + LLVMContext &Ctx = M.getContext(); + StructType *T; +#define OMP_TYPE(VarName, InitValue) VarName = InitValue; +#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ + VarName##Ty = ArrayType::get(ElemTy, ArraySize); \ + VarName##PtrTy = PointerType::getUnqual(VarName##Ty); +#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ + VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ + VarName##Ptr = PointerType::getUnqual(VarName); +#define OMP_STRUCT_TYPE(VarName, StructName, ...) \ + T = M.getTypeByName(StructName); \ + if (!T) \ + T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \ + VarName = T; \ + VarName##Ptr = PointerType::getUnqual(T); +#include "llvm/Frontend/OpenMP/OMPKinds.def" +} - return AfterIP; +void OpenMPIRBuilder::OutlineInfo::collectBlocks( + SmallPtrSetImpl<BasicBlock *> &BlockSet, + SmallVectorImpl<BasicBlock *> &BlockVector) { + SmallVector<BasicBlock *, 32> Worklist; + BlockSet.insert(EntryBB); + BlockSet.insert(ExitBB); + + Worklist.push_back(EntryBB); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + BlockVector.push_back(BB); + for (BasicBlock *SuccBB : successors(BB)) + if (BlockSet.insert(SuccBB).second) + Worklist.push_back(SuccBB); + } } |