summaryrefslogtreecommitdiff
path: root/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
commit344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
treef0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
Diffstat (limited to 'llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp')
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp975
1 files changed, 948 insertions, 27 deletions
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 1f67aecb57e9..76954f9a37e1 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -41,15 +42,27 @@ static cl::opt<bool>
void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
LLVMContext &Ctx = Fn.getContext();
+ // Get the function's current attributes.
+ auto Attrs = Fn.getAttributes();
+ auto FnAttrs = Attrs.getFnAttributes();
+ auto RetAttrs = Attrs.getRetAttributes();
+ SmallVector<AttributeSet, 4> ArgAttrs;
+ for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
+ ArgAttrs.emplace_back(Attrs.getParamAttributes(ArgNo));
+
#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
- // Add attributes to the new declaration.
+ // Add attributes to the function declaration.
switch (FnID) {
#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
case Enum: \
- Fn.setAttributes( \
- AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets)); \
+ FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
+ RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
+ for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
+ ArgAttrs[ArgNo] = \
+ ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
+ Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
break;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
default:
@@ -126,15 +139,23 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
void OpenMPIRBuilder::initialize() { initializeTypes(M); }
-void OpenMPIRBuilder::finalize(bool AllowExtractorSinking) {
+void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
SmallVector<BasicBlock *, 32> Blocks;
+ SmallVector<OutlineInfo, 16> DeferredOutlines;
for (OutlineInfo &OI : OutlineInfos) {
+ // Skip functions that have not finalized yet; may happen with nested
+ // function generation.
+ if (Fn && OI.getFunction() != Fn) {
+ DeferredOutlines.push_back(OI);
+ continue;
+ }
+
ParallelRegionBlockSet.clear();
Blocks.clear();
OI.collectBlocks(ParallelRegionBlockSet, Blocks);
- Function *OuterFn = OI.EntryBB->getParent();
+ Function *OuterFn = OI.getFunction();
CodeExtractorAnalysisCache CEAC(*OuterFn);
CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
/* AggregateArgs */ false,
@@ -199,8 +220,12 @@ void OpenMPIRBuilder::finalize(bool AllowExtractorSinking) {
OI.PostOutlineCB(*OutlinedFn);
}
- // Allow finalize to be called multiple times.
- OutlineInfos.clear();
+ // Remove work items that have been completed.
+ OutlineInfos = std::move(DeferredOutlines);
+}
+
+OpenMPIRBuilder::~OpenMPIRBuilder() {
+ assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
}
Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
@@ -393,9 +418,18 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
Value *Result = Builder.CreateCall(
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
+ auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
+ if (CanceledDirective == OMPD_parallel) {
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ Builder.restoreIP(IP);
+ createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+ omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
+ /* CheckCancelFlag */ false);
+ }
+ };
// The actual cancel logic is shared with others, e.g., cancel_barriers.
- emitCancelationCheckImpl(Result, CanceledDirective);
+ emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
// Update the insertion point and remove the terminator we introduced.
Builder.SetInsertPoint(UI->getParent());
@@ -404,8 +438,9 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
return Builder.saveIP();
}
-void OpenMPIRBuilder::emitCancelationCheckImpl(
- Value *CancelFlag, omp::Directive CanceledDirective) {
+void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
+ omp::Directive CanceledDirective,
+ FinalizeCallbackTy ExitCB) {
assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
"Unexpected cancellation!");
@@ -433,6 +468,8 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(
// From the cancellation block we finalize all variables and go to the
// post finalization block that is known to the FiniCB callback.
Builder.SetInsertPoint(CancellationBlock);
+ if (ExitCB)
+ ExitCB(Builder.saveIP());
auto &FI = FinalizationStack.back();
FI.FiniCB(Builder.saveIP());
@@ -540,11 +577,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
AllocaInst *PrivTIDAddr =
Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
- Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
+ Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
// Add some fake uses for OpenMP provided arguments.
- ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
- Instruction *ZeroAddrUse = Builder.CreateLoad(ZeroAddr, "zero.addr.use");
+ ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
+ Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr,
+ "zero.addr.use");
ToBeDeleted.push_back(ZeroAddrUse);
// ThenBB
@@ -625,7 +663,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
// Initialize the local TID stack location with the argument value.
Builder.SetInsertPoint(PrivTID);
Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
- Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
+ Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
// If no "if" clause was present we do not need the call created during
// outlining, otherwise we reuse it in the serialized parallel region.
@@ -743,7 +781,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
// Load back next to allocations in the to-be-outlined region.
Builder.restoreIP(InnerAllocaIP);
- Inner = Builder.CreateLoad(Ptr);
+ Inner = Builder.CreateLoad(V.getType(), Ptr);
}
Value *ReplacementValue = nullptr;
@@ -852,6 +890,138 @@ void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) {
emitTaskyieldImpl(Loc);
}
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
+ const LocationDescription &Loc, InsertPointTy AllocaIP,
+ ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB,
+ FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ auto FiniCBWrapper = [&](InsertPointTy IP) {
+ if (IP.getBlock()->end() != IP.getPoint())
+ return FiniCB(IP);
+ // This must be done otherwise any nested constructs using FinalizeOMPRegion
+ // will fail because that function requires the Finalization Basic Block to
+ // have a terminator, which is already removed by EmitOMPRegionBody.
+ // IP is currently at cancelation block.
+ // We need to backtrack to the condition block to fetch
+ // the exit block and create a branch from cancelation
+ // to exit block.
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ Builder.restoreIP(IP);
+ auto *CaseBB = IP.getBlock()->getSinglePredecessor();
+ auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
+ auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
+ Instruction *I = Builder.CreateBr(ExitBB);
+ IP = InsertPointTy(I->getParent(), I->getIterator());
+ return FiniCB(IP);
+ };
+
+ FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
+
+ // Each section is emitted as a switch case
+ // Each finalization callback is handled from clang.EmitOMPSectionDirective()
+ // -> OMP.createSection() which generates the IR for each section
+ // Iterate through all sections and emit a switch construct:
+ // switch (IV) {
+ // case 0:
+ // <SectionStmt[0]>;
+ // break;
+ // ...
+ // case <NumSection> - 1:
+ // <SectionStmt[<NumSection> - 1]>;
+ // break;
+ // }
+ // ...
+ // section_loop.after:
+ // <FiniCB>;
+ auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
+ auto *CurFn = CodeGenIP.getBlock()->getParent();
+ auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor();
+ auto *ForExitBB = CodeGenIP.getBlock()
+ ->getSinglePredecessor()
+ ->getTerminator()
+ ->getSuccessor(1);
+ SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB);
+ Builder.restoreIP(CodeGenIP);
+ unsigned CaseNumber = 0;
+ for (auto SectionCB : SectionCBs) {
+ auto *CaseBB = BasicBlock::Create(M.getContext(),
+ "omp_section_loop.body.case", CurFn);
+ SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
+ Builder.SetInsertPoint(CaseBB);
+ SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB);
+ CaseNumber++;
+ }
+ // remove the existing terminator from body BB since there can be no
+ // terminators after switch/case
+ CodeGenIP.getBlock()->getTerminator()->eraseFromParent();
+ };
+ // Loop body ends here
+ // LowerBound, UpperBound, and STride for createCanonicalLoop
+ Type *I32Ty = Type::getInt32Ty(M.getContext());
+ Value *LB = ConstantInt::get(I32Ty, 0);
+ Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
+ Value *ST = ConstantInt::get(I32Ty, 1);
+ llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
+ Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
+ LoopInfo = createStaticWorkshareLoop(Loc, LoopInfo, AllocaIP, true);
+ BasicBlock *LoopAfterBB = LoopInfo->getAfter();
+ Instruction *SplitPos = LoopAfterBB->getTerminator();
+ if (!isa_and_nonnull<BranchInst>(SplitPos))
+ SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
+ // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB,
+ // which requires a BB with branch
+ BasicBlock *ExitBB =
+ LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end");
+ SplitPos->eraseFromParent();
+
+ // Apply the finalization callback in LoopAfterBB
+ auto FiniInfo = FinalizationStack.pop_back_val();
+ assert(FiniInfo.DK == OMPD_sections &&
+ "Unexpected finalization stack state!");
+ Builder.SetInsertPoint(LoopAfterBB->getTerminator());
+ FiniInfo.FiniCB(Builder.saveIP());
+ Builder.SetInsertPoint(ExitBB);
+
+ return Builder.saveIP();
+}
+
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createSection(const LocationDescription &Loc,
+ BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ auto FiniCBWrapper = [&](InsertPointTy IP) {
+ if (IP.getBlock()->end() != IP.getPoint())
+ return FiniCB(IP);
+ // This must be done otherwise any nested constructs using FinalizeOMPRegion
+ // will fail because that function requires the Finalization Basic Block to
+ // have a terminator, which is already removed by EmitOMPRegionBody.
+ // IP is currently at cancelation block.
+ // We need to backtrack to the condition block to fetch
+ // the exit block and create a branch from cancelation
+ // to exit block.
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ Builder.restoreIP(IP);
+ auto *CaseBB = Loc.IP.getBlock();
+ auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
+ auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
+ Instruction *I = Builder.CreateBr(ExitBB);
+ IP = InsertPointTy(I->getParent(), I->getIterator());
+ return FiniCB(IP);
+ };
+
+ Directive OMPD = Directive::OMPD_sections;
+ // Since we are using Finalization Callback here, HasFinalize
+ // and IsCancellable have to be true
+ return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
+ /*Conditional*/ false, /*hasFinalize*/ true,
+ /*IsCancellable*/ true);
+}
+
OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
@@ -876,6 +1046,30 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
/*Conditional*/ true, /*hasFinalize*/ true);
}
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
+ BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB, Value *Filter) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Directive OMPD = Directive::OMPD_masked;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Value *Args[] = {Ident, ThreadId, Filter};
+ Value *ArgsEnd[] = {Ident, ThreadId};
+
+ Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
+ Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
+
+ Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
+ Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
+
+ return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
+ /*Conditional*/ true, /*hasFinalize*/ true);
+}
+
CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
BasicBlock *PostInsertBefore, const Twine &Name) {
@@ -1119,18 +1313,16 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
Value *ThreadNum = getOrCreateThreadID(SrcLoc);
- // TODO: extract scheduling type and map it to OMP constant. This is curently
- // happening in kmp.h and its ilk and needs to be moved to OpenMP.td first.
- constexpr int StaticSchedType = 34;
- Constant *SchedulingType = ConstantInt::get(I32Type, StaticSchedType);
+ Constant *SchedulingType =
+ ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static));
// Call the "init" function and update the trip count of the loop with the
// value it produced.
Builder.CreateCall(StaticInit,
{SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
PUpperBound, PStride, One, Chunk});
- Value *LowerBound = Builder.CreateLoad(PLowerBound);
- Value *InclusiveUpperBound = Builder.CreateLoad(PUpperBound);
+ Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
+ Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
setCanonicalLoopTripCount(CLI, TripCount);
@@ -1164,6 +1356,154 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
return CLI;
}
+CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop(
+ const LocationDescription &Loc, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP, bool NeedsBarrier) {
+ // Currently only supports static schedules.
+ return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier);
+}
+
+/// Returns an LLVM function to call for initializing loop bounds using OpenMP
+/// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
+/// the runtime. Always interpret integers as unsigned similarly to
+/// CanonicalLoopInfo.
+static FunctionCallee
+getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
+ unsigned Bitwidth = Ty->getIntegerBitWidth();
+ if (Bitwidth == 32)
+ return OMPBuilder.getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
+ if (Bitwidth == 64)
+ return OMPBuilder.getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
+ llvm_unreachable("unknown OpenMP loop iterator bitwidth");
+}
+
+/// Returns an LLVM function to call for updating the next loop using OpenMP
+/// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
+/// the runtime. Always interpret integers as unsigned similarly to
+/// CanonicalLoopInfo.
+static FunctionCallee
+getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
+ unsigned Bitwidth = Ty->getIntegerBitWidth();
+ if (Bitwidth == 32)
+ return OMPBuilder.getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
+ if (Bitwidth == 64)
+ return OMPBuilder.getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
+ llvm_unreachable("unknown OpenMP loop iterator bitwidth");
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop(
+ const LocationDescription &Loc, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier,
+ Value *Chunk) {
+ // Set up the source location value for OpenMP runtime.
+ Builder.SetCurrentDebugLocation(Loc.DL);
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *SrcLoc = getOrCreateIdent(SrcLocStr);
+
+ // Declare useful OpenMP runtime functions.
+ Value *IV = CLI->getIndVar();
+ Type *IVTy = IV->getType();
+ FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
+ FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
+
+ // Allocate space for computed loop bounds as expected by the "init" function.
+ Builder.restoreIP(AllocaIP);
+ Type *I32Type = Type::getInt32Ty(M.getContext());
+ Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
+ Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
+ Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
+ Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
+
+ // At the end of the preheader, prepare for calling the "init" function by
+ // storing the current loop bounds into the allocated space. A canonical loop
+ // always iterates from 0 to trip-count with step 1. Note that "init" expects
+ // and produces an inclusive upper bound.
+ BasicBlock *PreHeader = CLI->getPreheader();
+ Builder.SetInsertPoint(PreHeader->getTerminator());
+ Constant *One = ConstantInt::get(IVTy, 1);
+ Builder.CreateStore(One, PLowerBound);
+ Value *UpperBound = CLI->getTripCount();
+ Builder.CreateStore(UpperBound, PUpperBound);
+ Builder.CreateStore(One, PStride);
+
+ BasicBlock *Header = CLI->getHeader();
+ BasicBlock *Exit = CLI->getExit();
+ BasicBlock *Cond = CLI->getCond();
+ InsertPointTy AfterIP = CLI->getAfterIP();
+
+ // The CLI will be "broken" in the code below, as the loop is no longer
+ // a valid canonical loop.
+
+ if (!Chunk)
+ Chunk = One;
+
+ Value *ThreadNum = getOrCreateThreadID(SrcLoc);
+
+ Constant *SchedulingType =
+ ConstantInt::get(I32Type, static_cast<int>(SchedType));
+
+ // Call the "init" function.
+ Builder.CreateCall(DynamicInit,
+ {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
+ UpperBound, /* step */ One, Chunk});
+
+ // An outer loop around the existing one.
+ BasicBlock *OuterCond = BasicBlock::Create(
+ PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
+ PreHeader->getParent());
+ // This needs to be 32-bit always, so can't use the IVTy Zero above.
+ Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
+ Value *Res =
+ Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
+ PLowerBound, PUpperBound, PStride});
+ Constant *Zero32 = ConstantInt::get(I32Type, 0);
+ Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
+ Value *LowerBound =
+ Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
+ Builder.CreateCondBr(MoreWork, Header, Exit);
+
+ // Change PHI-node in loop header to use outer cond rather than preheader,
+ // and set IV to the LowerBound.
+ Instruction *Phi = &Header->front();
+ auto *PI = cast<PHINode>(Phi);
+ PI->setIncomingBlock(0, OuterCond);
+ PI->setIncomingValue(0, LowerBound);
+
+ // Then set the pre-header to jump to the OuterCond
+ Instruction *Term = PreHeader->getTerminator();
+ auto *Br = cast<BranchInst>(Term);
+ Br->setSuccessor(0, OuterCond);
+
+ // Modify the inner condition:
+ // * Use the UpperBound returned from the DynamicNext call.
+ // * jump to the loop outer loop when done with one of the inner loops.
+ Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
+ UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
+ Instruction *Comp = &*Builder.GetInsertPoint();
+ auto *CI = cast<CmpInst>(Comp);
+ CI->setOperand(1, UpperBound);
+ // Redirect the inner exit to branch to outer condition.
+ Instruction *Branch = &Cond->back();
+ auto *BI = cast<BranchInst>(Branch);
+ assert(BI->getSuccessor(1) == Exit);
+ BI->setSuccessor(1, OuterCond);
+
+ // Add the barrier if requested.
+ if (NeedsBarrier) {
+ Builder.SetInsertPoint(&Exit->back());
+ createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+ omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
+ /* CheckCancelFlag */ false);
+ }
+
+ return AfterIP;
+}
+
/// Make \p Source branch to \p Target.
///
/// Handles two situations:
@@ -1225,6 +1565,127 @@ static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) {
DeleteDeadBlocks(BBVec);
}
+CanonicalLoopInfo *
+OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
+ InsertPointTy ComputeIP) {
+ assert(Loops.size() >= 1 && "At least one loop required");
+ size_t NumLoops = Loops.size();
+
+ // Nothing to do if there is already just one loop.
+ if (NumLoops == 1)
+ return Loops.front();
+
+ CanonicalLoopInfo *Outermost = Loops.front();
+ CanonicalLoopInfo *Innermost = Loops.back();
+ BasicBlock *OrigPreheader = Outermost->getPreheader();
+ BasicBlock *OrigAfter = Outermost->getAfter();
+ Function *F = OrigPreheader->getParent();
+
+ // Setup the IRBuilder for inserting the trip count computation.
+ Builder.SetCurrentDebugLocation(DL);
+ if (ComputeIP.isSet())
+ Builder.restoreIP(ComputeIP);
+ else
+ Builder.restoreIP(Outermost->getPreheaderIP());
+
+ // Derive the collapsed' loop trip count.
+ // TODO: Find common/largest indvar type.
+ Value *CollapsedTripCount = nullptr;
+ for (CanonicalLoopInfo *L : Loops) {
+ Value *OrigTripCount = L->getTripCount();
+ if (!CollapsedTripCount) {
+ CollapsedTripCount = OrigTripCount;
+ continue;
+ }
+
+ // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
+ CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
+ {}, /*HasNUW=*/true);
+ }
+
+ // Create the collapsed loop control flow.
+ CanonicalLoopInfo *Result =
+ createLoopSkeleton(DL, CollapsedTripCount, F,
+ OrigPreheader->getNextNode(), OrigAfter, "collapsed");
+
+ // Build the collapsed loop body code.
+ // Start with deriving the input loop induction variables from the collapsed
+ // one, using a divmod scheme. To preserve the original loops' order, the
+ // innermost loop use the least significant bits.
+ Builder.restoreIP(Result->getBodyIP());
+
+ Value *Leftover = Result->getIndVar();
+ SmallVector<Value *> NewIndVars;
+ NewIndVars.set_size(NumLoops);
+ for (int i = NumLoops - 1; i >= 1; --i) {
+ Value *OrigTripCount = Loops[i]->getTripCount();
+
+ Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
+ NewIndVars[i] = NewIndVar;
+
+ Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
+ }
+ // Outermost loop gets all the remaining bits.
+ NewIndVars[0] = Leftover;
+
+ // Construct the loop body control flow.
+ // We progressively construct the branch structure following in direction of
+ // the control flow, from the leading in-between code, the loop nest body, the
+ // trailing in-between code, and rejoining the collapsed loop's latch.
+ // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
+ // the ContinueBlock is set, continue with that block. If ContinuePred, use
+ // its predecessors as sources.
+ BasicBlock *ContinueBlock = Result->getBody();
+ BasicBlock *ContinuePred = nullptr;
+ auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
+ BasicBlock *NextSrc) {
+ if (ContinueBlock)
+ redirectTo(ContinueBlock, Dest, DL);
+ else
+ redirectAllPredecessorsTo(ContinuePred, Dest, DL);
+
+ ContinueBlock = nullptr;
+ ContinuePred = NextSrc;
+ };
+
+ // The code before the nested loop of each level.
+ // Because we are sinking it into the nest, it will be executed more often
+ // that the original loop. More sophisticated schemes could keep track of what
+ // the in-between code is and instantiate it only once per thread.
+ for (size_t i = 0; i < NumLoops - 1; ++i)
+ ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
+
+ // Connect the loop nest body.
+ ContinueWith(Innermost->getBody(), Innermost->getLatch());
+
+ // The code after the nested loop at each level.
+ for (size_t i = NumLoops - 1; i > 0; --i)
+ ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
+
+ // Connect the finished loop to the collapsed loop latch.
+ ContinueWith(Result->getLatch(), nullptr);
+
+ // Replace the input loops with the new collapsed loop.
+ redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
+ redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
+
+ // Replace the input loop indvars with the derived ones.
+ for (size_t i = 0; i < NumLoops; ++i)
+ Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
+
+ // Remove unused parts of the input loops.
+ SmallVector<BasicBlock *, 12> OldControlBBs;
+ OldControlBBs.reserve(6 * Loops.size());
+ for (CanonicalLoopInfo *Loop : Loops)
+ Loop->collectControlBlocks(OldControlBBs);
+ removeUnusedBlocksFromParent(OldControlBBs);
+
+#ifndef NDEBUG
+ Result->assertOK();
+#endif
+ return Result;
+}
+
std::vector<CanonicalLoopInfo *>
OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
ArrayRef<Value *> TileSizes) {
@@ -1421,7 +1882,7 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
- llvm::Value *DidItLD = Builder.CreateLoad(DidIt);
+ llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
@@ -1502,10 +1963,10 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
- bool HasFinalize) {
+ bool HasFinalize, bool IsCancellable) {
if (HasFinalize)
- FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false});
+ FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
// Create inlined region's entry and body blocks, in preparation
// for conditional creation
@@ -1570,9 +2031,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
-
// if nothing to do, Return current insertion point.
- if (!Conditional)
+ if (!Conditional || !EntryCall)
return Builder.saveIP();
BasicBlock *EntryBB = Builder.GetInsertBlock();
@@ -1622,6 +2082,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
Builder.SetInsertPoint(FiniBBTI);
}
+ if (!ExitCall)
+ return Builder.saveIP();
+
// place the Exitcall as last instruction before Finalization block terminator
ExitCall->removeFromParent();
Builder.Insert(ExitCall);
@@ -1724,11 +2187,75 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
Function *Fn =
- getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
+ getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
return Builder.CreateCall(Fn, Args);
}
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
+ ConstantInt *UseGenericStateMachine =
+ ConstantInt::getBool(Int32->getContext(), !IsSPMD);
+ ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
+
+ Function *Fn = getOrCreateRuntimeFunctionPtr(
+ omp::RuntimeFunction::OMPRTL___kmpc_target_init);
+
+ CallInst *ThreadKind =
+ Builder.CreateCall(Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
+
+ Value *ExecUserCode = Builder.CreateICmpEQ(
+ ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), "exec_user_code");
+
+ // ThreadKind = __kmpc_target_init(...)
+ // if (ThreadKind == -1)
+ // user_code
+ // else
+ // return;
+
+ auto *UI = Builder.CreateUnreachable();
+ BasicBlock *CheckBB = UI->getParent();
+ BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
+
+ BasicBlock *WorkerExitBB = BasicBlock::Create(
+ CheckBB->getContext(), "worker.exit", CheckBB->getParent());
+ Builder.SetInsertPoint(WorkerExitBB);
+ Builder.CreateRetVoid();
+
+ auto *CheckBBTI = CheckBB->getTerminator();
+ Builder.SetInsertPoint(CheckBBTI);
+ Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
+
+ CheckBBTI->eraseFromParent();
+ UI->eraseFromParent();
+
+ // Continue in the "user_code" block, see diagram above and in
+ // openmp/libomptarget/deviceRTLs/common/include/target.h .
+ return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
+}
+
+void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
+ bool IsSPMD, bool RequiresFullRuntime) {
+ if (!updateToLocation(Loc))
+ return;
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
+ ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
+
+ Function *Fn = getOrCreateRuntimeFunctionPtr(
+ omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
+
+ Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
+}
+
std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
StringRef FirstSeparator,
StringRef Separator) {
@@ -1778,6 +2305,400 @@ Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
}
+GlobalVariable *
+OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
+ std::string VarName) {
+ llvm::Constant *MaptypesArrayInit =
+ llvm::ConstantDataArray::get(M.getContext(), Mappings);
+ auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
+ M, MaptypesArrayInit->getType(),
+ /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
+ VarName);
+ MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ return MaptypesArrayGlobal;
+}
+
+void OpenMPIRBuilder::createMapperAllocas(const LocationDescription &Loc,
+ InsertPointTy AllocaIP,
+ unsigned NumOperands,
+ struct MapperAllocas &MapperAllocas) {
+ if (!updateToLocation(Loc))
+ return;
+
+ auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
+ auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
+ Builder.restoreIP(AllocaIP);
+ AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy);
+ AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy);
+ AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty);
+ Builder.restoreIP(Loc.IP);
+ MapperAllocas.ArgsBase = ArgsBase;
+ MapperAllocas.Args = Args;
+ MapperAllocas.ArgSizes = ArgSizes;
+}
+
+void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc,
+ Function *MapperFunc, Value *SrcLocInfo,
+ Value *MaptypesArg, Value *MapnamesArg,
+ struct MapperAllocas &MapperAllocas,
+ int64_t DeviceID, unsigned NumOperands) {
+ if (!updateToLocation(Loc))
+ return;
+
+ auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
+ auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
+ Value *ArgsBaseGEP =
+ Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
+ {Builder.getInt32(0), Builder.getInt32(0)});
+ Value *ArgsGEP =
+ Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
+ {Builder.getInt32(0), Builder.getInt32(0)});
+ Value *ArgSizesGEP =
+ Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
+ {Builder.getInt32(0), Builder.getInt32(0)});
+ Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo());
+ Builder.CreateCall(MapperFunc,
+ {SrcLocInfo, Builder.getInt64(DeviceID),
+ Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
+ ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
+}
+
+bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
+ const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
+ assert(!(AO == AtomicOrdering::NotAtomic ||
+ AO == llvm::AtomicOrdering::Unordered) &&
+ "Unexpected Atomic Ordering.");
+
+ bool Flush = false;
+ llvm::AtomicOrdering FlushAO = AtomicOrdering::Monotonic;
+
+ switch (AK) {
+ case Read:
+ if (AO == AtomicOrdering::Acquire || AO == AtomicOrdering::AcquireRelease ||
+ AO == AtomicOrdering::SequentiallyConsistent) {
+ FlushAO = AtomicOrdering::Acquire;
+ Flush = true;
+ }
+ break;
+ case Write:
+ case Update:
+ if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease ||
+ AO == AtomicOrdering::SequentiallyConsistent) {
+ FlushAO = AtomicOrdering::Release;
+ Flush = true;
+ }
+ break;
+ case Capture:
+ switch (AO) {
+ case AtomicOrdering::Acquire:
+ FlushAO = AtomicOrdering::Acquire;
+ Flush = true;
+ break;
+ case AtomicOrdering::Release:
+ FlushAO = AtomicOrdering::Release;
+ Flush = true;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ FlushAO = AtomicOrdering::AcquireRelease;
+ Flush = true;
+ break;
+ default:
+ // do nothing - leave silently.
+ break;
+ }
+ }
+
+ if (Flush) {
+ // Currently Flush RT call still doesn't take memory_ordering, so for when
+ // that happens, this tries to do the resolution of which atomic ordering
+ // to use with but issue the flush call
+ // TODO: pass `FlushAO` after memory ordering support is added
+ (void)FlushAO;
+ emitFlush(Loc);
+ }
+
+ // for AO == AtomicOrdering::Monotonic and all other case combinations
+ // do nothing
+ return Flush;
+}
+
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
+ AtomicOpValue &X, AtomicOpValue &V,
+ AtomicOrdering AO) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Type *XTy = X.Var->getType();
+ assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
+ Type *XElemTy = XTy->getPointerElementType();
+ assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
+ XElemTy->isPointerTy()) &&
+ "OMP atomic read expected a scalar type");
+
+ Value *XRead = nullptr;
+
+ if (XElemTy->isIntegerTy()) {
+ LoadInst *XLD =
+ Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
+ XLD->setAtomic(AO);
+ XRead = cast<Value>(XLD);
+ } else {
+ // We need to bitcast and perform atomic op as integer
+ unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
+ IntegerType *IntCastTy =
+ IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
+ Value *XBCast = Builder.CreateBitCast(
+ X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
+ LoadInst *XLoad =
+ Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
+ XLoad->setAtomic(AO);
+ if (XElemTy->isFloatingPointTy()) {
+ XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
+ } else {
+ XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
+ }
+ }
+ checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
+ Builder.CreateStore(XRead, V.Var, V.IsVolatile);
+ return Builder.saveIP();
+}
+
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
+ AtomicOpValue &X, Value *Expr,
+ AtomicOrdering AO) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Type *XTy = X.Var->getType();
+ assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
+ Type *XElemTy = XTy->getPointerElementType();
+ assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
+ XElemTy->isPointerTy()) &&
+ "OMP atomic write expected a scalar type");
+
+ if (XElemTy->isIntegerTy()) {
+ StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
+ XSt->setAtomic(AO);
+ } else {
+ // We need to bitcast and perform atomic op as integers
+ unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
+ IntegerType *IntCastTy =
+ IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
+ Value *XBCast = Builder.CreateBitCast(
+ X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
+ Value *ExprCast =
+ Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
+ StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
+ XSt->setAtomic(AO);
+ }
+
+ checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
+ return Builder.saveIP();
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
+ const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
+ Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
+ AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ LLVM_DEBUG({
+ Type *XTy = X.Var->getType();
+ assert(XTy->isPointerTy() &&
+ "OMP Atomic expects a pointer to target memory");
+ Type *XElemTy = XTy->getPointerElementType();
+ assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
+ XElemTy->isPointerTy()) &&
+ "OMP atomic update expected a scalar type");
+ assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
+ (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
+ "OpenMP atomic does not support LT or GT operations");
+ });
+
+ emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
+ IsXLHSInRHSPart);
+ checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
+ return Builder.saveIP();
+}
+
+Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
+ AtomicRMWInst::BinOp RMWOp) {
+ switch (RMWOp) {
+ case AtomicRMWInst::Add:
+ return Builder.CreateAdd(Src1, Src2);
+ case AtomicRMWInst::Sub:
+ return Builder.CreateSub(Src1, Src2);
+ case AtomicRMWInst::And:
+ return Builder.CreateAnd(Src1, Src2);
+ case AtomicRMWInst::Nand:
+ return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
+ case AtomicRMWInst::Or:
+ return Builder.CreateOr(Src1, Src2);
+ case AtomicRMWInst::Xor:
+ return Builder.CreateXor(Src1, Src2);
+ case AtomicRMWInst::Xchg:
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
+ case AtomicRMWInst::BAD_BINOP:
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ llvm_unreachable("Unsupported atomic update operation");
+ }
+ llvm_unreachable("Unsupported atomic update operation");
+}
+
+std::pair<Value *, Value *>
+OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr,
+ AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
+ AtomicUpdateCallbackTy &UpdateOp,
+ bool VolatileX, bool IsXLHSInRHSPart) {
+ Type *XElemTy = X->getType()->getPointerElementType();
+
+ bool DoCmpExch =
+ ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
+ (RMWOp == AtomicRMWInst::FSub) ||
+ (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart);
+
+ std::pair<Value *, Value *> Res;
+ if (XElemTy->isIntegerTy() && !DoCmpExch) {
+ Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
+ // not needed except in case of postfix captures. Generate anyway for
+ // consistency with the else part. Will be removed with any DCE pass.
+ Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
+ } else {
+ unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
+ IntegerType *IntCastTy =
+ IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
+ Value *XBCast =
+ Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
+ LoadInst *OldVal =
+ Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
+ OldVal->setAtomic(AO);
+ // CurBB
+ // | /---\
+ // ContBB |
+ // | \---/
+ // ExitBB
+ BasicBlock *CurBB = Builder.GetInsertBlock();
+ Instruction *CurBBTI = CurBB->getTerminator();
+ CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
+ BasicBlock *ExitBB =
+ CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
+ BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
+ X->getName() + ".atomic.cont");
+ ContBB->getTerminator()->eraseFromParent();
+ Builder.SetInsertPoint(ContBB);
+ llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
+ PHI->addIncoming(OldVal, CurBB);
+ AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
+ NewAtomicAddr->setName(X->getName() + "x.new.val");
+ NewAtomicAddr->moveBefore(AllocIP);
+ IntegerType *NewAtomicCastTy =
+ IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
+ bool IsIntTy = XElemTy->isIntegerTy();
+ Value *NewAtomicIntAddr =
+ (IsIntTy)
+ ? NewAtomicAddr
+ : Builder.CreateBitCast(NewAtomicAddr,
+ NewAtomicCastTy->getPointerTo(Addrspace));
+ Value *OldExprVal = PHI;
+ if (!IsIntTy) {
+ if (XElemTy->isFloatingPointTy()) {
+ OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
+ X->getName() + ".atomic.fltCast");
+ } else {
+ OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
+ X->getName() + ".atomic.ptrCast");
+ }
+ }
+
+ Value *Upd = UpdateOp(OldExprVal, Builder);
+ Builder.CreateStore(Upd, NewAtomicAddr);
+ LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr);
+ Value *XAddr =
+ (IsIntTy)
+ ? X
+ : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
+ AtomicOrdering Failure =
+ llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
+ AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
+ XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure);
+ Result->setVolatile(VolatileX);
+ Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
+ Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
+ PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
+ Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
+
+ Res.first = OldExprVal;
+ Res.second = Upd;
+
+ // set Insertion point in exit block
+ if (UnreachableInst *ExitTI =
+ dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
+ CurBBTI->eraseFromParent();
+ Builder.SetInsertPoint(ExitBB);
+ } else {
+ Builder.SetInsertPoint(ExitTI);
+ }
+ }
+
+ return Res;
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
+ const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
+ AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
+ AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
+ bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ LLVM_DEBUG({
+ Type *XTy = X.Var->getType();
+ assert(XTy->isPointerTy() &&
+ "OMP Atomic expects a pointer to target memory");
+ Type *XElemTy = XTy->getPointerElementType();
+ assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
+ XElemTy->isPointerTy()) &&
+ "OMP atomic capture expected a scalar type");
+ assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
+ "OpenMP atomic does not support LT or GT operations");
+ });
+
+ // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
+ // 'x' is simply atomically rewritten with 'expr'.
+ AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
+ std::pair<Value *, Value *> Result =
+ emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp,
+ X.IsVolatile, IsXLHSInRHSPart);
+
+ Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
+ Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
+
+ checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
+ return Builder.saveIP();
+}
+
+GlobalVariable *
+OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
+ std::string VarName) {
+ llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
+ llvm::ArrayType::get(
+ llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
+ Names);
+ auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
+ M, MapNamesArrayInit->getType(),
+ /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
+ VarName);
+ return MapNamesArrayGlobal;
+}
+
// Create all simple and struct types exposed by the runtime and remember
// the llvm::PointerTypes of them for easy access later.
void OpenMPIRBuilder::initializeTypes(Module &M) {