diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
| commit | 344a3780b2e33f6ca763666c380202b18aab72a3 (patch) | |
| tree | f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | |
| parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) | |
vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0evendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f
Diffstat (limited to 'llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp')
| -rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 975 |
1 files changed, 948 insertions, 27 deletions
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 1f67aecb57e9..76954f9a37e1 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -41,15 +42,27 @@ static cl::opt<bool> void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { LLVMContext &Ctx = Fn.getContext(); + // Get the function's current attributes. + auto Attrs = Fn.getAttributes(); + auto FnAttrs = Attrs.getFnAttributes(); + auto RetAttrs = Attrs.getRetAttributes(); + SmallVector<AttributeSet, 4> ArgAttrs; + for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo) + ArgAttrs.emplace_back(Attrs.getParamAttributes(ArgNo)); + #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet; #include "llvm/Frontend/OpenMP/OMPKinds.def" - // Add attributes to the new declaration. + // Add attributes to the function declaration. switch (FnID) { #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \ case Enum: \ - Fn.setAttributes( \ - AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets)); \ + FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \ + RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \ + for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \ + ArgAttrs[ArgNo] = \ + ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \ + Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \ break; #include "llvm/Frontend/OpenMP/OMPKinds.def" default: @@ -126,15 +139,23 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { void OpenMPIRBuilder::initialize() { initializeTypes(M); } -void OpenMPIRBuilder::finalize(bool AllowExtractorSinking) { +void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) { SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; SmallVector<BasicBlock *, 32> Blocks; + SmallVector<OutlineInfo, 16> DeferredOutlines; for (OutlineInfo &OI : OutlineInfos) { + // Skip functions that have not finalized yet; may happen with nested + // function generation. + if (Fn && OI.getFunction() != Fn) { + DeferredOutlines.push_back(OI); + continue; + } + ParallelRegionBlockSet.clear(); Blocks.clear(); OI.collectBlocks(ParallelRegionBlockSet, Blocks); - Function *OuterFn = OI.EntryBB->getParent(); + Function *OuterFn = OI.getFunction(); CodeExtractorAnalysisCache CEAC(*OuterFn); CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, @@ -199,8 +220,12 @@ void OpenMPIRBuilder::finalize(bool AllowExtractorSinking) { OI.PostOutlineCB(*OutlinedFn); } - // Allow finalize to be called multiple times. - OutlineInfos.clear(); + // Remove work items that have been completed. + OutlineInfos = std::move(DeferredOutlines); +} + +OpenMPIRBuilder::~OpenMPIRBuilder() { + assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, @@ -393,9 +418,18 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); + auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) { + if (CanceledDirective == OMPD_parallel) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(IP); + createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + } + }; // The actual cancel logic is shared with others, e.g., cancel_barriers. - emitCancelationCheckImpl(Result, CanceledDirective); + emitCancelationCheckImpl(Result, CanceledDirective, ExitCB); // Update the insertion point and remove the terminator we introduced. Builder.SetInsertPoint(UI->getParent()); @@ -404,8 +438,9 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, return Builder.saveIP(); } -void OpenMPIRBuilder::emitCancelationCheckImpl( - Value *CancelFlag, omp::Directive CanceledDirective) { +void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, + omp::Directive CanceledDirective, + FinalizeCallbackTy ExitCB) { assert(isLastFinalizationInfoCancellable(CanceledDirective) && "Unexpected cancellation!"); @@ -433,6 +468,8 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( // From the cancellation block we finalize all variables and go to the // post finalization block that is known to the FiniCB callback. Builder.SetInsertPoint(CancellationBlock); + if (ExitCB) + ExitCB(Builder.saveIP()); auto &FI = FinalizationStack.back(); FI.FiniCB(Builder.saveIP()); @@ -540,11 +577,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( AllocaInst *PrivTIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); - Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid"); + Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid"); // Add some fake uses for OpenMP provided arguments. - ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use")); - Instruction *ZeroAddrUse = Builder.CreateLoad(ZeroAddr, "zero.addr.use"); + ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use")); + Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr, + "zero.addr.use"); ToBeDeleted.push_back(ZeroAddrUse); // ThenBB @@ -625,7 +663,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( // Initialize the local TID stack location with the argument value. Builder.SetInsertPoint(PrivTID); Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); - Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); + Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr); // If no "if" clause was present we do not need the call created during // outlining, otherwise we reuse it in the serialized parallel region. @@ -743,7 +781,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( // Load back next to allocations in the to-be-outlined region. Builder.restoreIP(InnerAllocaIP); - Inner = Builder.CreateLoad(Ptr); + Inner = Builder.CreateLoad(V.getType(), Ptr); } Value *ReplacementValue = nullptr; @@ -852,6 +890,138 @@ void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { emitTaskyieldImpl(Loc); } +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( + const LocationDescription &Loc, InsertPointTy AllocaIP, + ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB, + FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { + if (!updateToLocation(Loc)) + return Loc.IP; + + auto FiniCBWrapper = [&](InsertPointTy IP) { + if (IP.getBlock()->end() != IP.getPoint()) + return FiniCB(IP); + // This must be done otherwise any nested constructs using FinalizeOMPRegion + // will fail because that function requires the Finalization Basic Block to + // have a terminator, which is already removed by EmitOMPRegionBody. + // IP is currently at cancelation block. + // We need to backtrack to the condition block to fetch + // the exit block and create a branch from cancelation + // to exit block. + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(IP); + auto *CaseBB = IP.getBlock()->getSinglePredecessor(); + auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); + auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); + Instruction *I = Builder.CreateBr(ExitBB); + IP = InsertPointTy(I->getParent(), I->getIterator()); + return FiniCB(IP); + }; + + FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); + + // Each section is emitted as a switch case + // Each finalization callback is handled from clang.EmitOMPSectionDirective() + // -> OMP.createSection() which generates the IR for each section + // Iterate through all sections and emit a switch construct: + // switch (IV) { + // case 0: + // <SectionStmt[0]>; + // break; + // ... + // case <NumSection> - 1: + // <SectionStmt[<NumSection> - 1]>; + // break; + // } + // ... + // section_loop.after: + // <FiniCB>; + auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) { + auto *CurFn = CodeGenIP.getBlock()->getParent(); + auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor(); + auto *ForExitBB = CodeGenIP.getBlock() + ->getSinglePredecessor() + ->getTerminator() + ->getSuccessor(1); + SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB); + Builder.restoreIP(CodeGenIP); + unsigned CaseNumber = 0; + for (auto SectionCB : SectionCBs) { + auto *CaseBB = BasicBlock::Create(M.getContext(), + "omp_section_loop.body.case", CurFn); + SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); + Builder.SetInsertPoint(CaseBB); + SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB); + CaseNumber++; + } + // remove the existing terminator from body BB since there can be no + // terminators after switch/case + CodeGenIP.getBlock()->getTerminator()->eraseFromParent(); + }; + // Loop body ends here + // LowerBound, UpperBound, and STride for createCanonicalLoop + Type *I32Ty = Type::getInt32Ty(M.getContext()); + Value *LB = ConstantInt::get(I32Ty, 0); + Value *UB = ConstantInt::get(I32Ty, SectionCBs.size()); + Value *ST = ConstantInt::get(I32Ty, 1); + llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( + Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); + LoopInfo = createStaticWorkshareLoop(Loc, LoopInfo, AllocaIP, true); + BasicBlock *LoopAfterBB = LoopInfo->getAfter(); + Instruction *SplitPos = LoopAfterBB->getTerminator(); + if (!isa_and_nonnull<BranchInst>(SplitPos)) + SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB); + // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB, + // which requires a BB with branch + BasicBlock *ExitBB = + LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end"); + SplitPos->eraseFromParent(); + + // Apply the finalization callback in LoopAfterBB + auto FiniInfo = FinalizationStack.pop_back_val(); + assert(FiniInfo.DK == OMPD_sections && + "Unexpected finalization stack state!"); + Builder.SetInsertPoint(LoopAfterBB->getTerminator()); + FiniInfo.FiniCB(Builder.saveIP()); + Builder.SetInsertPoint(ExitBB); + + return Builder.saveIP(); +} + +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createSection(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB) { + if (!updateToLocation(Loc)) + return Loc.IP; + + auto FiniCBWrapper = [&](InsertPointTy IP) { + if (IP.getBlock()->end() != IP.getPoint()) + return FiniCB(IP); + // This must be done otherwise any nested constructs using FinalizeOMPRegion + // will fail because that function requires the Finalization Basic Block to + // have a terminator, which is already removed by EmitOMPRegionBody. + // IP is currently at cancelation block. + // We need to backtrack to the condition block to fetch + // the exit block and create a branch from cancelation + // to exit block. + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(IP); + auto *CaseBB = Loc.IP.getBlock(); + auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); + auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); + Instruction *I = Builder.CreateBr(ExitBB); + IP = InsertPointTy(I->getParent(), I->getIterator()); + return FiniCB(IP); + }; + + Directive OMPD = Directive::OMPD_sections; + // Since we are using Finalization Callback here, HasFinalize + // and IsCancellable have to be true + return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper, + /*Conditional*/ false, /*hasFinalize*/ true, + /*IsCancellable*/ true); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, @@ -876,6 +1046,30 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc, /*Conditional*/ true, /*hasFinalize*/ true); } +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createMasked(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, Value *Filter) { + if (!updateToLocation(Loc)) + return Loc.IP; + + Directive OMPD = Directive::OMPD_masked; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *Args[] = {Ident, ThreadId, Filter}; + Value *ArgsEnd[] = {Ident, ThreadId}; + + Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked); + Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); + + Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked); + Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd); + + return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, + /*Conditional*/ true, /*hasFinalize*/ true); +} + CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name) { @@ -1119,18 +1313,16 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop( Value *ThreadNum = getOrCreateThreadID(SrcLoc); - // TODO: extract scheduling type and map it to OMP constant. This is curently - // happening in kmp.h and its ilk and needs to be moved to OpenMP.td first. - constexpr int StaticSchedType = 34; - Constant *SchedulingType = ConstantInt::get(I32Type, StaticSchedType); + Constant *SchedulingType = + ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static)); // Call the "init" function and update the trip count of the loop with the // value it produced. Builder.CreateCall(StaticInit, {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound, PStride, One, Chunk}); - Value *LowerBound = Builder.CreateLoad(PLowerBound); - Value *InclusiveUpperBound = Builder.CreateLoad(PUpperBound); + Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); + Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One); setCanonicalLoopTripCount(CLI, TripCount); @@ -1164,6 +1356,154 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop( return CLI; } +CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop( + const LocationDescription &Loc, CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, bool NeedsBarrier) { + // Currently only supports static schedules. + return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier); +} + +/// Returns an LLVM function to call for initializing loop bounds using OpenMP +/// dynamic scheduling depending on `type`. Only i32 and i64 are supported by +/// the runtime. Always interpret integers as unsigned similarly to +/// CanonicalLoopInfo. +static FunctionCallee +getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { + unsigned Bitwidth = Ty->getIntegerBitWidth(); + if (Bitwidth == 32) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u); + if (Bitwidth == 64) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u); + llvm_unreachable("unknown OpenMP loop iterator bitwidth"); +} + +/// Returns an LLVM function to call for updating the next loop using OpenMP +/// dynamic scheduling depending on `type`. Only i32 and i64 are supported by +/// the runtime. Always interpret integers as unsigned similarly to +/// CanonicalLoopInfo. +static FunctionCallee +getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { + unsigned Bitwidth = Ty->getIntegerBitWidth(); + if (Bitwidth == 32) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u); + if (Bitwidth == 64) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u); + llvm_unreachable("unknown OpenMP loop iterator bitwidth"); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop( + const LocationDescription &Loc, CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier, + Value *Chunk) { + // Set up the source location value for OpenMP runtime. + Builder.SetCurrentDebugLocation(Loc.DL); + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *SrcLoc = getOrCreateIdent(SrcLocStr); + + // Declare useful OpenMP runtime functions. + Value *IV = CLI->getIndVar(); + Type *IVTy = IV->getType(); + FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this); + FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this); + + // Allocate space for computed loop bounds as expected by the "init" function. + Builder.restoreIP(AllocaIP); + Type *I32Type = Type::getInt32Ty(M.getContext()); + Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); + Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); + Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); + Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); + + // At the end of the preheader, prepare for calling the "init" function by + // storing the current loop bounds into the allocated space. A canonical loop + // always iterates from 0 to trip-count with step 1. Note that "init" expects + // and produces an inclusive upper bound. + BasicBlock *PreHeader = CLI->getPreheader(); + Builder.SetInsertPoint(PreHeader->getTerminator()); + Constant *One = ConstantInt::get(IVTy, 1); + Builder.CreateStore(One, PLowerBound); + Value *UpperBound = CLI->getTripCount(); + Builder.CreateStore(UpperBound, PUpperBound); + Builder.CreateStore(One, PStride); + + BasicBlock *Header = CLI->getHeader(); + BasicBlock *Exit = CLI->getExit(); + BasicBlock *Cond = CLI->getCond(); + InsertPointTy AfterIP = CLI->getAfterIP(); + + // The CLI will be "broken" in the code below, as the loop is no longer + // a valid canonical loop. + + if (!Chunk) + Chunk = One; + + Value *ThreadNum = getOrCreateThreadID(SrcLoc); + + Constant *SchedulingType = + ConstantInt::get(I32Type, static_cast<int>(SchedType)); + + // Call the "init" function. + Builder.CreateCall(DynamicInit, + {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One, + UpperBound, /* step */ One, Chunk}); + + // An outer loop around the existing one. + BasicBlock *OuterCond = BasicBlock::Create( + PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond", + PreHeader->getParent()); + // This needs to be 32-bit always, so can't use the IVTy Zero above. + Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt()); + Value *Res = + Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter, + PLowerBound, PUpperBound, PStride}); + Constant *Zero32 = ConstantInt::get(I32Type, 0); + Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32); + Value *LowerBound = + Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb"); + Builder.CreateCondBr(MoreWork, Header, Exit); + + // Change PHI-node in loop header to use outer cond rather than preheader, + // and set IV to the LowerBound. + Instruction *Phi = &Header->front(); + auto *PI = cast<PHINode>(Phi); + PI->setIncomingBlock(0, OuterCond); + PI->setIncomingValue(0, LowerBound); + + // Then set the pre-header to jump to the OuterCond + Instruction *Term = PreHeader->getTerminator(); + auto *Br = cast<BranchInst>(Term); + Br->setSuccessor(0, OuterCond); + + // Modify the inner condition: + // * Use the UpperBound returned from the DynamicNext call. + // * jump to the loop outer loop when done with one of the inner loops. + Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt()); + UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub"); + Instruction *Comp = &*Builder.GetInsertPoint(); + auto *CI = cast<CmpInst>(Comp); + CI->setOperand(1, UpperBound); + // Redirect the inner exit to branch to outer condition. + Instruction *Branch = &Cond->back(); + auto *BI = cast<BranchInst>(Branch); + assert(BI->getSuccessor(1) == Exit); + BI->setSuccessor(1, OuterCond); + + // Add the barrier if requested. + if (NeedsBarrier) { + Builder.SetInsertPoint(&Exit->back()); + createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_for, /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + } + + return AfterIP; +} + /// Make \p Source branch to \p Target. /// /// Handles two situations: @@ -1225,6 +1565,127 @@ static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) { DeleteDeadBlocks(BBVec); } +CanonicalLoopInfo * +OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, + InsertPointTy ComputeIP) { + assert(Loops.size() >= 1 && "At least one loop required"); + size_t NumLoops = Loops.size(); + + // Nothing to do if there is already just one loop. + if (NumLoops == 1) + return Loops.front(); + + CanonicalLoopInfo *Outermost = Loops.front(); + CanonicalLoopInfo *Innermost = Loops.back(); + BasicBlock *OrigPreheader = Outermost->getPreheader(); + BasicBlock *OrigAfter = Outermost->getAfter(); + Function *F = OrigPreheader->getParent(); + + // Setup the IRBuilder for inserting the trip count computation. + Builder.SetCurrentDebugLocation(DL); + if (ComputeIP.isSet()) + Builder.restoreIP(ComputeIP); + else + Builder.restoreIP(Outermost->getPreheaderIP()); + + // Derive the collapsed' loop trip count. + // TODO: Find common/largest indvar type. + Value *CollapsedTripCount = nullptr; + for (CanonicalLoopInfo *L : Loops) { + Value *OrigTripCount = L->getTripCount(); + if (!CollapsedTripCount) { + CollapsedTripCount = OrigTripCount; + continue; + } + + // TODO: Enable UndefinedSanitizer to diagnose an overflow here. + CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount, + {}, /*HasNUW=*/true); + } + + // Create the collapsed loop control flow. + CanonicalLoopInfo *Result = + createLoopSkeleton(DL, CollapsedTripCount, F, + OrigPreheader->getNextNode(), OrigAfter, "collapsed"); + + // Build the collapsed loop body code. + // Start with deriving the input loop induction variables from the collapsed + // one, using a divmod scheme. To preserve the original loops' order, the + // innermost loop use the least significant bits. + Builder.restoreIP(Result->getBodyIP()); + + Value *Leftover = Result->getIndVar(); + SmallVector<Value *> NewIndVars; + NewIndVars.set_size(NumLoops); + for (int i = NumLoops - 1; i >= 1; --i) { + Value *OrigTripCount = Loops[i]->getTripCount(); + + Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount); + NewIndVars[i] = NewIndVar; + + Leftover = Builder.CreateUDiv(Leftover, OrigTripCount); + } + // Outermost loop gets all the remaining bits. + NewIndVars[0] = Leftover; + + // Construct the loop body control flow. + // We progressively construct the branch structure following in direction of + // the control flow, from the leading in-between code, the loop nest body, the + // trailing in-between code, and rejoining the collapsed loop's latch. + // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If + // the ContinueBlock is set, continue with that block. If ContinuePred, use + // its predecessors as sources. + BasicBlock *ContinueBlock = Result->getBody(); + BasicBlock *ContinuePred = nullptr; + auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest, + BasicBlock *NextSrc) { + if (ContinueBlock) + redirectTo(ContinueBlock, Dest, DL); + else + redirectAllPredecessorsTo(ContinuePred, Dest, DL); + + ContinueBlock = nullptr; + ContinuePred = NextSrc; + }; + + // The code before the nested loop of each level. + // Because we are sinking it into the nest, it will be executed more often + // that the original loop. More sophisticated schemes could keep track of what + // the in-between code is and instantiate it only once per thread. + for (size_t i = 0; i < NumLoops - 1; ++i) + ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader()); + + // Connect the loop nest body. + ContinueWith(Innermost->getBody(), Innermost->getLatch()); + + // The code after the nested loop at each level. + for (size_t i = NumLoops - 1; i > 0; --i) + ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch()); + + // Connect the finished loop to the collapsed loop latch. + ContinueWith(Result->getLatch(), nullptr); + + // Replace the input loops with the new collapsed loop. + redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL); + redirectTo(Result->getAfter(), Outermost->getAfter(), DL); + + // Replace the input loop indvars with the derived ones. + for (size_t i = 0; i < NumLoops; ++i) + Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]); + + // Remove unused parts of the input loops. + SmallVector<BasicBlock *, 12> OldControlBBs; + OldControlBBs.reserve(6 * Loops.size()); + for (CanonicalLoopInfo *Loop : Loops) + Loop->collectControlBlocks(OldControlBBs); + removeUnusedBlocksFromParent(OldControlBBs); + +#ifndef NDEBUG + Result->assertOK(); +#endif + return Result; +} + std::vector<CanonicalLoopInfo *> OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, ArrayRef<Value *> TileSizes) { @@ -1421,7 +1882,7 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); - llvm::Value *DidItLD = Builder.CreateLoad(DidIt); + llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt); Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD}; @@ -1502,10 +1963,10 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, - bool HasFinalize) { + bool HasFinalize, bool IsCancellable) { if (HasFinalize) - FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false}); + FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); // Create inlined region's entry and body blocks, in preparation // for conditional creation @@ -1570,9 +2031,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) { - // if nothing to do, Return current insertion point. - if (!Conditional) + if (!Conditional || !EntryCall) return Builder.saveIP(); BasicBlock *EntryBB = Builder.GetInsertBlock(); @@ -1622,6 +2082,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( Builder.SetInsertPoint(FiniBBTI); } + if (!ExitCall) + return Builder.saveIP(); + // place the Exitcall as last instruction before Finalization block terminator ExitCall->removeFromParent(); Builder.Insert(ExitCall); @@ -1724,11 +2187,75 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate( llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; Function *Fn = - getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); return Builder.CreateCall(Fn, Args); } +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime) { + if (!updateToLocation(Loc)) + return Loc.IP; + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD); + ConstantInt *UseGenericStateMachine = + ConstantInt::getBool(Int32->getContext(), !IsSPMD); + ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); + + Function *Fn = getOrCreateRuntimeFunctionPtr( + omp::RuntimeFunction::OMPRTL___kmpc_target_init); + + CallInst *ThreadKind = + Builder.CreateCall(Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal}); + + Value *ExecUserCode = Builder.CreateICmpEQ( + ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), "exec_user_code"); + + // ThreadKind = __kmpc_target_init(...) + // if (ThreadKind == -1) + // user_code + // else + // return; + + auto *UI = Builder.CreateUnreachable(); + BasicBlock *CheckBB = UI->getParent(); + BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry"); + + BasicBlock *WorkerExitBB = BasicBlock::Create( + CheckBB->getContext(), "worker.exit", CheckBB->getParent()); + Builder.SetInsertPoint(WorkerExitBB); + Builder.CreateRetVoid(); + + auto *CheckBBTI = CheckBB->getTerminator(); + Builder.SetInsertPoint(CheckBBTI); + Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB); + + CheckBBTI->eraseFromParent(); + UI->eraseFromParent(); + + // Continue in the "user_code" block, see diagram above and in + // openmp/libomptarget/deviceRTLs/common/include/target.h . + return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt()); +} + +void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, + bool IsSPMD, bool RequiresFullRuntime) { + if (!updateToLocation(Loc)) + return; + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD); + ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); + + Function *Fn = getOrCreateRuntimeFunctionPtr( + omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); + + Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal}); +} + std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { @@ -1778,6 +2305,400 @@ Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); } +GlobalVariable * +OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, + std::string VarName) { + llvm::Constant *MaptypesArrayInit = + llvm::ConstantDataArray::get(M.getContext(), Mappings); + auto *MaptypesArrayGlobal = new llvm::GlobalVariable( + M, MaptypesArrayInit->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit, + VarName); + MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + return MaptypesArrayGlobal; +} + +void OpenMPIRBuilder::createMapperAllocas(const LocationDescription &Loc, + InsertPointTy AllocaIP, + unsigned NumOperands, + struct MapperAllocas &MapperAllocas) { + if (!updateToLocation(Loc)) + return; + + auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands); + auto *ArrI64Ty = ArrayType::get(Int64, NumOperands); + Builder.restoreIP(AllocaIP); + AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy); + AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy); + AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty); + Builder.restoreIP(Loc.IP); + MapperAllocas.ArgsBase = ArgsBase; + MapperAllocas.Args = Args; + MapperAllocas.ArgSizes = ArgSizes; +} + +void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc, + Function *MapperFunc, Value *SrcLocInfo, + Value *MaptypesArg, Value *MapnamesArg, + struct MapperAllocas &MapperAllocas, + int64_t DeviceID, unsigned NumOperands) { + if (!updateToLocation(Loc)) + return; + + auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands); + auto *ArrI64Ty = ArrayType::get(Int64, NumOperands); + Value *ArgsBaseGEP = + Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase, + {Builder.getInt32(0), Builder.getInt32(0)}); + Value *ArgsGEP = + Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args, + {Builder.getInt32(0), Builder.getInt32(0)}); + Value *ArgSizesGEP = + Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes, + {Builder.getInt32(0), Builder.getInt32(0)}); + Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo()); + Builder.CreateCall(MapperFunc, + {SrcLocInfo, Builder.getInt64(DeviceID), + Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP, + ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr}); +} + +bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic( + const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) { + assert(!(AO == AtomicOrdering::NotAtomic || + AO == llvm::AtomicOrdering::Unordered) && + "Unexpected Atomic Ordering."); + + bool Flush = false; + llvm::AtomicOrdering FlushAO = AtomicOrdering::Monotonic; + + switch (AK) { + case Read: + if (AO == AtomicOrdering::Acquire || AO == AtomicOrdering::AcquireRelease || + AO == AtomicOrdering::SequentiallyConsistent) { + FlushAO = AtomicOrdering::Acquire; + Flush = true; + } + break; + case Write: + case Update: + if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease || + AO == AtomicOrdering::SequentiallyConsistent) { + FlushAO = AtomicOrdering::Release; + Flush = true; + } + break; + case Capture: + switch (AO) { + case AtomicOrdering::Acquire: + FlushAO = AtomicOrdering::Acquire; + Flush = true; + break; + case AtomicOrdering::Release: + FlushAO = AtomicOrdering::Release; + Flush = true; + break; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + FlushAO = AtomicOrdering::AcquireRelease; + Flush = true; + break; + default: + // do nothing - leave silently. + break; + } + } + + if (Flush) { + // Currently Flush RT call still doesn't take memory_ordering, so for when + // that happens, this tries to do the resolution of which atomic ordering + // to use with but issue the flush call + // TODO: pass `FlushAO` after memory ordering support is added + (void)FlushAO; + emitFlush(Loc); + } + + // for AO == AtomicOrdering::Monotonic and all other case combinations + // do nothing + return Flush; +} + +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc, + AtomicOpValue &X, AtomicOpValue &V, + AtomicOrdering AO) { + if (!updateToLocation(Loc)) + return Loc.IP; + + Type *XTy = X.Var->getType(); + assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); + Type *XElemTy = XTy->getPointerElementType(); + assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || + XElemTy->isPointerTy()) && + "OMP atomic read expected a scalar type"); + + Value *XRead = nullptr; + + if (XElemTy->isIntegerTy()) { + LoadInst *XLD = + Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read"); + XLD->setAtomic(AO); + XRead = cast<Value>(XLD); + } else { + // We need to bitcast and perform atomic op as integer + unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace(); + IntegerType *IntCastTy = + IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); + Value *XBCast = Builder.CreateBitCast( + X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast"); + LoadInst *XLoad = + Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load"); + XLoad->setAtomic(AO); + if (XElemTy->isFloatingPointTy()) { + XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast"); + } else { + XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast"); + } + } + checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read); + Builder.CreateStore(XRead, V.Var, V.IsVolatile); + return Builder.saveIP(); +} + +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc, + AtomicOpValue &X, Value *Expr, + AtomicOrdering AO) { + if (!updateToLocation(Loc)) + return Loc.IP; + + Type *XTy = X.Var->getType(); + assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); + Type *XElemTy = XTy->getPointerElementType(); + assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || + XElemTy->isPointerTy()) && + "OMP atomic write expected a scalar type"); + + if (XElemTy->isIntegerTy()) { + StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile); + XSt->setAtomic(AO); + } else { + // We need to bitcast and perform atomic op as integers + unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace(); + IntegerType *IntCastTy = + IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); + Value *XBCast = Builder.CreateBitCast( + X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast"); + Value *ExprCast = + Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast"); + StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile); + XSt->setAtomic(AO); + } + + checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write); + return Builder.saveIP(); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( + const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, + Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, + AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) { + if (!updateToLocation(Loc)) + return Loc.IP; + + LLVM_DEBUG({ + Type *XTy = X.Var->getType(); + assert(XTy->isPointerTy() && + "OMP Atomic expects a pointer to target memory"); + Type *XElemTy = XTy->getPointerElementType(); + assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || + XElemTy->isPointerTy()) && + "OMP atomic update expected a scalar type"); + assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) && + (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) && + "OpenMP atomic does not support LT or GT operations"); + }); + + emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile, + IsXLHSInRHSPart); + checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update); + return Builder.saveIP(); +} + +Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, + AtomicRMWInst::BinOp RMWOp) { + switch (RMWOp) { + case AtomicRMWInst::Add: + return Builder.CreateAdd(Src1, Src2); + case AtomicRMWInst::Sub: + return Builder.CreateSub(Src1, Src2); + case AtomicRMWInst::And: + return Builder.CreateAnd(Src1, Src2); + case AtomicRMWInst::Nand: + return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2)); + case AtomicRMWInst::Or: + return Builder.CreateOr(Src1, Src2); + case AtomicRMWInst::Xor: + return Builder.CreateXor(Src1, Src2); + case AtomicRMWInst::Xchg: + case AtomicRMWInst::FAdd: + case AtomicRMWInst::FSub: + case AtomicRMWInst::BAD_BINOP: + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + llvm_unreachable("Unsupported atomic update operation"); + } + llvm_unreachable("Unsupported atomic update operation"); +} + +std::pair<Value *, Value *> +OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr, + AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, + AtomicUpdateCallbackTy &UpdateOp, + bool VolatileX, bool IsXLHSInRHSPart) { + Type *XElemTy = X->getType()->getPointerElementType(); + + bool DoCmpExch = + ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) || + (RMWOp == AtomicRMWInst::FSub) || + (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart); + + std::pair<Value *, Value *> Res; + if (XElemTy->isIntegerTy() && !DoCmpExch) { + Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO); + // not needed except in case of postfix captures. Generate anyway for + // consistency with the else part. Will be removed with any DCE pass. + Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp); + } else { + unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace(); + IntegerType *IntCastTy = + IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); + Value *XBCast = + Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace)); + LoadInst *OldVal = + Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load"); + OldVal->setAtomic(AO); + // CurBB + // | /---\ + // ContBB | + // | \---/ + // ExitBB + BasicBlock *CurBB = Builder.GetInsertBlock(); + Instruction *CurBBTI = CurBB->getTerminator(); + CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable(); + BasicBlock *ExitBB = + CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit"); + BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(), + X->getName() + ".atomic.cont"); + ContBB->getTerminator()->eraseFromParent(); + Builder.SetInsertPoint(ContBB); + llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2); + PHI->addIncoming(OldVal, CurBB); + AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy); + NewAtomicAddr->setName(X->getName() + "x.new.val"); + NewAtomicAddr->moveBefore(AllocIP); + IntegerType *NewAtomicCastTy = + IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); + bool IsIntTy = XElemTy->isIntegerTy(); + Value *NewAtomicIntAddr = + (IsIntTy) + ? NewAtomicAddr + : Builder.CreateBitCast(NewAtomicAddr, + NewAtomicCastTy->getPointerTo(Addrspace)); + Value *OldExprVal = PHI; + if (!IsIntTy) { + if (XElemTy->isFloatingPointTy()) { + OldExprVal = Builder.CreateBitCast(PHI, XElemTy, + X->getName() + ".atomic.fltCast"); + } else { + OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy, + X->getName() + ".atomic.ptrCast"); + } + } + + Value *Upd = UpdateOp(OldExprVal, Builder); + Builder.CreateStore(Upd, NewAtomicAddr); + LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr); + Value *XAddr = + (IsIntTy) + ? X + : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace)); + AtomicOrdering Failure = + llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); + AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg( + XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure); + Result->setVolatile(VolatileX); + Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0); + Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1); + PHI->addIncoming(PreviousVal, Builder.GetInsertBlock()); + Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB); + + Res.first = OldExprVal; + Res.second = Upd; + + // set Insertion point in exit block + if (UnreachableInst *ExitTI = + dyn_cast<UnreachableInst>(ExitBB->getTerminator())) { + CurBBTI->eraseFromParent(); + Builder.SetInsertPoint(ExitBB); + } else { + Builder.SetInsertPoint(ExitTI); + } + } + + return Res; +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( + const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, + AtomicOpValue &V, Value *Expr, AtomicOrdering AO, + AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, + bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) { + if (!updateToLocation(Loc)) + return Loc.IP; + + LLVM_DEBUG({ + Type *XTy = X.Var->getType(); + assert(XTy->isPointerTy() && + "OMP Atomic expects a pointer to target memory"); + Type *XElemTy = XTy->getPointerElementType(); + assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || + XElemTy->isPointerTy()) && + "OMP atomic capture expected a scalar type"); + assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) && + "OpenMP atomic does not support LT or GT operations"); + }); + + // If UpdateExpr is 'x' updated with some `expr` not based on 'x', + // 'x' is simply atomically rewritten with 'expr'. + AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg); + std::pair<Value *, Value *> Result = + emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp, + X.IsVolatile, IsXLHSInRHSPart); + + Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second); + Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile); + + checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture); + return Builder.saveIP(); +} + +GlobalVariable * +OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, + std::string VarName) { + llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( + llvm::ArrayType::get( + llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()), + Names); + auto *MapNamesArrayGlobal = new llvm::GlobalVariable( + M, MapNamesArrayInit->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit, + VarName); + return MapNamesArrayGlobal; +} + // Create all simple and struct types exposed by the runtime and remember // the llvm::PointerTypes of them for easy access later. void OpenMPIRBuilder::initializeTypes(Module &M) { |
