From 5e20cdd81c44a443562a09007668ffdf76c455af Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 27 May 2015 18:47:56 +0000 Subject: Vendor import of clang trunk r238337: https://llvm.org/svn/llvm-project/cfe/trunk@238337 --- lib/CodeGen/CGStmtOpenMP.cpp | 2112 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 1712 insertions(+), 400 deletions(-) (limited to 'lib/CodeGen/CGStmtOpenMP.cpp') diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 78fd37ce6562..07fc6e966af6 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -20,258 +20,473 @@ using namespace clang; using namespace CodeGen; -namespace { -/// \brief RAII for emitting code of CapturedStmt without function outlining. -class InlinedOpenMPRegion { - CodeGenFunction &CGF; - CodeGenFunction::CGCapturedStmtInfo *PrevCapturedStmtInfo; - const Decl *StoredCurCodeDecl; - - /// \brief A class to emit CapturedStmt construct as inlined statement without - /// generating a function for outlined code. - class CGInlinedOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { - public: - CGInlinedOpenMPRegionInfo() : CGCapturedStmtInfo() {} - }; - -public: - InlinedOpenMPRegion(CodeGenFunction &CGF, const Stmt *S) - : CGF(CGF), PrevCapturedStmtInfo(CGF.CapturedStmtInfo), - StoredCurCodeDecl(CGF.CurCodeDecl) { - CGF.CurCodeDecl = cast(S)->getCapturedDecl(); - CGF.CapturedStmtInfo = new CGInlinedOpenMPRegionInfo(); - } - ~InlinedOpenMPRegion() { - delete CGF.CapturedStmtInfo; - CGF.CapturedStmtInfo = PrevCapturedStmtInfo; - CGF.CurCodeDecl = StoredCurCodeDecl; - } -}; -} // namespace - //===----------------------------------------------------------------------===// // OpenMP Directive Emission //===----------------------------------------------------------------------===// +void CodeGenFunction::EmitOMPAggregateAssign( + llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType, + const llvm::function_ref &CopyGen) { + // Perform element-by-element initialization. + QualType ElementTy; + auto SrcBegin = SrcAddr; + auto DestBegin = DestAddr; + auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); + auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); + // Cast from pointer to array type to pointer to single element. + SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin, + DestBegin->getType()); + auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); + // The basic structure here is a while-do loop. + auto BodyBB = createBasicBlock("omp.arraycpy.body"); + auto DoneBB = createBasicBlock("omp.arraycpy.done"); + auto IsEmpty = + Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); + Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + auto EntryBB = Builder.GetInsertBlock(); + EmitBlock(BodyBB); + auto SrcElementCurrent = + Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); + SrcElementCurrent->addIncoming(SrcBegin, EntryBB); + auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2, + "omp.arraycpy.destElementPast"); + DestElementCurrent->addIncoming(DestBegin, EntryBB); + + // Emit copy. + CopyGen(DestElementCurrent, SrcElementCurrent); + + // Shift the address forward by one element. + auto DestElementNext = Builder.CreateConstGEP1_32( + DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element"); + auto SrcElementNext = Builder.CreateConstGEP1_32( + SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element"); + // Check whether we've reached the end. + auto Done = + Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); + Builder.CreateCondBr(Done, DoneBB, BodyBB); + DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock()); + SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock()); + + // Done. + EmitBlock(DoneBB, /*IsFinished=*/true); +} -/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen -/// function. Here is the logic: -/// if (Cond) { -/// CodeGen(true); -/// } else { -/// CodeGen(false); -/// } -static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, - const std::function &CodeGen) { - CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); - - // If the condition constant folds and can be elided, try to avoid emitting - // the condition and the dead arm of the if/else. - bool CondConstant; - if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { - CodeGen(CondConstant); - return; - } - - // Otherwise, the condition did not fold, or we couldn't elide it. Just - // emit the conditional branch. - auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then"); - auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else"); - auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end"); - CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0); - - // Emit the 'then' code. - CGF.EmitBlock(ThenBlock); - CodeGen(/*ThenBlock*/ true); - CGF.EmitBranch(ContBlock); - // Emit the 'else' code if present. - { - // There is no need to emit line number for unconditional branch. - ApplyDebugLocation DL(CGF); - CGF.EmitBlock(ElseBlock); - } - CodeGen(/*ThenBlock*/ false); - { - // There is no need to emit line number for unconditional branch. - ApplyDebugLocation DL(CGF); - CGF.EmitBranch(ContBlock); - } - // Emit the continuation block for code after the if. - CGF.EmitBlock(ContBlock, /*IsFinished*/ true); -} - -void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr, - llvm::Value *PrivateAddr, - const Expr *AssignExpr, - QualType OriginalType, - const VarDecl *VDInit) { - EmitBlock(createBasicBlock(".omp.assign.begin.")); - if (!isa(AssignExpr) || isTrivialInitializer(AssignExpr)) { - // Perform simple memcpy. - EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(), - AssignExpr->getType()); - } else { - // Perform element-by-element initialization. - QualType ElementTy; - auto SrcBegin = OriginalAddr.getAddress(); - auto DestBegin = PrivateAddr; - auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); - auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin); - auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); - auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements); - auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements); - // The basic structure here is a do-while loop, because we don't - // need to check for the zero-element case. - auto BodyBB = createBasicBlock("omp.arraycpy.body"); - auto DoneBB = createBasicBlock("omp.arraycpy.done"); - auto IsEmpty = - Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); - Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); - - // Enter the loop body, making that address the current address. - auto EntryBB = Builder.GetInsertBlock(); - EmitBlock(BodyBB); - auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2, - "omp.arraycpy.srcElementPast"); - SrcElementPast->addIncoming(SrcEnd, EntryBB); - auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2, - "omp.arraycpy.destElementPast"); - DestElementPast->addIncoming(DestEnd, EntryBB); - - // Shift the address back by one element. - auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true); - auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne, - "omp.arraycpy.dest.element"); - auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne, - "omp.arraycpy.src.element"); - { - // Create RunCleanScope to cleanup possible temps. - CodeGenFunction::RunCleanupsScope Init(*this); - // Emit initialization for single element. - LocalDeclMap[VDInit] = SrcElement; - EmitAnyExprToMem(AssignExpr, DestElement, - AssignExpr->getType().getQualifiers(), - /*IsInitializer*/ false); - LocalDeclMap.erase(VDInit); +void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF, + QualType OriginalType, llvm::Value *DestAddr, + llvm::Value *SrcAddr, const VarDecl *DestVD, + const VarDecl *SrcVD, const Expr *Copy) { + if (OriginalType->isArrayType()) { + auto *BO = dyn_cast(Copy); + if (BO && BO->getOpcode() == BO_Assign) { + // Perform simple memcpy for simple copying. + CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); + } else { + // For arrays with complex element types perform element by element + // copying. + CGF.EmitOMPAggregateAssign( + DestAddr, SrcAddr, OriginalType, + [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement, + llvm::Value *SrcElement) { + // Working with the single array element, so have to remap + // destination and source variables to corresponding array + // elements. + CodeGenFunction::OMPPrivateScope Remap(CGF); + Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{ + return DestElement; + }); + Remap.addPrivate( + SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; }); + (void)Remap.Privatize(); + CGF.EmitIgnoredExpr(Copy); + }); } - - // Check whether we've reached the end. - auto Done = - Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done"); - Builder.CreateCondBr(Done, DoneBB, BodyBB); - DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock()); - SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock()); - - // Done. - EmitBlock(DoneBB, true); + } else { + // Remap pseudo source variable to private copy. + CodeGenFunction::OMPPrivateScope Remap(CGF); + Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; }); + Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; }); + (void)Remap.Privatize(); + // Emit copying of the whole variable. + CGF.EmitIgnoredExpr(Copy); } - EmitBlock(createBasicBlock(".omp.assign.end.")); } -void CodeGenFunction::EmitOMPFirstprivateClause( - const OMPExecutableDirective &D, - CodeGenFunction::OMPPrivateScope &PrivateScope) { - auto PrivateFilter = [](const OMPClause *C) -> bool { - return C->getClauseKind() == OMPC_firstprivate; - }; - for (OMPExecutableDirective::filtered_clause_iterator - I(D.clauses(), PrivateFilter); I; ++I) { +bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, + OMPPrivateScope &PrivateScope) { + llvm::DenseSet EmittedAsFirstprivate; + for (auto &&I = D.getClausesOfKind(OMPC_firstprivate); I; ++I) { auto *C = cast(*I); auto IRef = C->varlist_begin(); auto InitsRef = C->inits().begin(); for (auto IInit : C->private_copies()) { auto *OrigVD = cast(cast(*IRef)->getDecl()); - auto *VD = cast(cast(IInit)->getDecl()); - bool IsRegistered; - if (*InitsRef != nullptr) { - // Emit VarDecl with copy init for arrays. - auto *FD = CapturedStmtInfo->lookup(OrigVD); - LValue Base = MakeNaturalAlignAddrLValue( - CapturedStmtInfo->getContextValue(), - getContext().getTagDeclType(FD->getParent())); - auto OriginalAddr = EmitLValueForField(Base, FD); - auto VDInit = cast(cast(*InitsRef)->getDecl()); - IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { - auto Emission = EmitAutoVarAlloca(*VD); - // Emit initialization of aggregate firstprivate vars. - EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(), - VD->getInit(), (*IRef)->getType(), VDInit); - EmitAutoVarCleanups(Emission); - return Emission.getAllocatedAddress(); - }); - } else - IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { - // Emit private VarDecl with copy init. - EmitDecl(*VD); - return GetAddrOfLocalVar(VD); - }); - assert(IsRegistered && "counter already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; + if (EmittedAsFirstprivate.count(OrigVD) == 0) { + EmittedAsFirstprivate.insert(OrigVD); + auto *VD = cast(cast(IInit)->getDecl()); + auto *VDInit = cast(cast(*InitsRef)->getDecl()); + bool IsRegistered; + DeclRefExpr DRE( + const_cast(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( + OrigVD) != nullptr, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + auto *OriginalAddr = EmitLValue(&DRE).getAddress(); + QualType Type = OrigVD->getType(); + if (Type->isArrayType()) { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ + auto Emission = EmitAutoVarAlloca(*VD); + auto *Init = VD->getInit(); + if (!isa(Init) || isTrivialInitializer(Init)) { + // Perform simple memcpy. + EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, + Type); + } else { + EmitOMPAggregateAssign( + Emission.getAllocatedAddress(), OriginalAddr, Type, + [this, VDInit, Init](llvm::Value *DestElement, + llvm::Value *SrcElement) { + // Clean up any temporaries needed by the initialization. + RunCleanupsScope InitScope(*this); + // Emit initialization for single element. + LocalDeclMap[VDInit] = SrcElement; + EmitAnyExprToMem(Init, DestElement, + Init->getType().getQualifiers(), + /*IsInitializer*/ false); + LocalDeclMap.erase(VDInit); + }); + } + EmitAutoVarCleanups(Emission); + return Emission.getAllocatedAddress(); + }); + } else { + IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ + // Emit private VarDecl with copy init. + // Remap temp VDInit variable to the address of the original + // variable + // (for proper handling of captured global variables). + LocalDeclMap[VDInit] = OriginalAddr; + EmitDecl(*VD); + LocalDeclMap.erase(VDInit); + return GetAddrOfLocalVar(VD); + }); + } + assert(IsRegistered && + "firstprivate var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + } ++IRef, ++InitsRef; } } + return !EmittedAsFirstprivate.empty(); } void CodeGenFunction::EmitOMPPrivateClause( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { - auto PrivateFilter = [](const OMPClause *C) -> bool { - return C->getClauseKind() == OMPC_private; - }; - for (OMPExecutableDirective::filtered_clause_iterator - I(D.clauses(), PrivateFilter); I; ++I) { + llvm::DenseSet EmittedAsPrivate; + for (auto &&I = D.getClausesOfKind(OMPC_private); I; ++I) { auto *C = cast(*I); auto IRef = C->varlist_begin(); for (auto IInit : C->private_copies()) { auto *OrigVD = cast(cast(*IRef)->getDecl()); - auto VD = cast(cast(IInit)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + auto VD = cast(cast(IInit)->getDecl()); + bool IsRegistered = + PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ + // Emit private VarDecl with copy init. + EmitDecl(*VD); + return GetAddrOfLocalVar(VD); + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + } + ++IRef; + } + } +} + +bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { + // threadprivate_var1 = master_threadprivate_var1; + // operator=(threadprivate_var2, master_threadprivate_var2); + // ... + // __kmpc_barrier(&loc, global_tid); + llvm::DenseSet CopiedVars; + llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; + for (auto &&I = D.getClausesOfKind(OMPC_copyin); I; ++I) { + auto *C = cast(*I); + auto IRef = C->varlist_begin(); + auto ISrcRef = C->source_exprs().begin(); + auto IDestRef = C->destination_exprs().begin(); + for (auto *AssignOp : C->assignment_ops()) { + auto *VD = cast(cast(*IRef)->getDecl()); + QualType Type = VD->getType(); + if (CopiedVars.insert(VD->getCanonicalDecl()).second) { + // Get the address of the master variable. + auto *MasterAddr = VD->isStaticLocal() + ? CGM.getStaticLocalDeclAddress(VD) + : CGM.GetAddrOfGlobal(VD); + // Get the address of the threadprivate variable. + auto *PrivateAddr = EmitLValue(*IRef).getAddress(); + if (CopiedVars.size() == 1) { + // At first check if current thread is a master thread. If it is, no + // need to copy data. + CopyBegin = createBasicBlock("copyin.not.master"); + CopyEnd = createBasicBlock("copyin.not.master.end"); + Builder.CreateCondBr( + Builder.CreateICmpNE( + Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy), + Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)), + CopyBegin, CopyEnd); + EmitBlock(CopyBegin); + } + auto *SrcVD = cast(cast(*ISrcRef)->getDecl()); + auto *DestVD = cast(cast(*IDestRef)->getDecl()); + EmitOMPCopy(*this, Type, PrivateAddr, MasterAddr, DestVD, SrcVD, + AssignOp); + } + ++IRef; + ++ISrcRef; + ++IDestRef; + } + } + if (CopyEnd) { + // Exit out of copying procedure for non-master thread. + EmitBlock(CopyEnd, /*IsFinished=*/true); + return true; + } + return false; +} + +bool CodeGenFunction::EmitOMPLastprivateClauseInit( + const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { + bool HasAtLeastOneLastprivate = false; + llvm::DenseSet AlreadyEmittedVars; + for (auto &&I = D.getClausesOfKind(OMPC_lastprivate); I; ++I) { + HasAtLeastOneLastprivate = true; + auto *C = cast(*I); + auto IRef = C->varlist_begin(); + auto IDestRef = C->destination_exprs().begin(); + for (auto *IInit : C->private_copies()) { + // Keep the address of the original variable for future update at the end + // of the loop. + auto *OrigVD = cast(cast(*IRef)->getDecl()); + if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { + auto *DestVD = cast(cast(*IDestRef)->getDecl()); + PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{ + DeclRefExpr DRE( + const_cast(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( + OrigVD) != nullptr, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + return EmitLValue(&DRE).getAddress(); + }); + // Check if the variable is also a firstprivate: in this case IInit is + // not generated. Initialization of this variable will happen in codegen + // for 'firstprivate' clause. + if (IInit) { + auto *VD = cast(cast(IInit)->getDecl()); + bool IsRegistered = + PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ + // Emit private VarDecl with copy init. + EmitDecl(*VD); + return GetAddrOfLocalVar(VD); + }); + assert(IsRegistered && + "lastprivate var already registered as private"); + (void)IsRegistered; + } + } + ++IRef, ++IDestRef; + } + } + return HasAtLeastOneLastprivate; +} + +void CodeGenFunction::EmitOMPLastprivateClauseFinal( + const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { + // Emit following code: + // if () { + // orig_var1 = private_orig_var1; + // ... + // orig_varn = private_orig_varn; + // } + auto *ThenBB = createBasicBlock(".omp.lastprivate.then"); + auto *DoneBB = createBasicBlock(".omp.lastprivate.done"); + Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); + EmitBlock(ThenBB); + llvm::DenseMap LoopCountersAndUpdates; + const Expr *LastIterVal = nullptr; + const Expr *IVExpr = nullptr; + const Expr *IncExpr = nullptr; + if (auto *LoopDirective = dyn_cast(&D)) { + LastIterVal = + cast(cast(LoopDirective->getUpperBoundVariable()) + ->getDecl()) + ->getAnyInitializer(); + IVExpr = LoopDirective->getIterationVariable(); + IncExpr = LoopDirective->getInc(); + auto IUpdate = LoopDirective->updates().begin(); + for (auto *E : LoopDirective->counters()) { + auto *D = cast(E)->getDecl()->getCanonicalDecl(); + LoopCountersAndUpdates[D] = *IUpdate; + ++IUpdate; + } + } + { + llvm::DenseSet AlreadyEmittedVars; + bool FirstLCV = true; + for (auto &&I = D.getClausesOfKind(OMPC_lastprivate); I; ++I) { + auto *C = cast(*I); + auto IRef = C->varlist_begin(); + auto ISrcRef = C->source_exprs().begin(); + auto IDestRef = C->destination_exprs().begin(); + for (auto *AssignOp : C->assignment_ops()) { + auto *PrivateVD = cast(cast(*IRef)->getDecl()); + QualType Type = PrivateVD->getType(); + auto *CanonicalVD = PrivateVD->getCanonicalDecl(); + if (AlreadyEmittedVars.insert(CanonicalVD).second) { + // If lastprivate variable is a loop control variable for loop-based + // directive, update its value before copyin back to original + // variable. + if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) { + if (FirstLCV) { + EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(), + IVExpr->getType().getQualifiers(), + /*IsInitializer=*/false); + EmitIgnoredExpr(IncExpr); + FirstLCV = false; + } + EmitIgnoredExpr(UpExpr); + } + auto *SrcVD = cast(cast(*ISrcRef)->getDecl()); + auto *DestVD = cast(cast(*IDestRef)->getDecl()); + // Get the address of the original variable. + auto *OriginalAddr = GetAddrOfLocalVar(DestVD); + // Get the address of the private variable. + auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD); + EmitOMPCopy(*this, Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, + AssignOp); + } + ++IRef; + ++ISrcRef; + ++IDestRef; + } + } + } + EmitBlock(DoneBB, /*IsFinished=*/true); +} + +void CodeGenFunction::EmitOMPReductionClauseInit( + const OMPExecutableDirective &D, + CodeGenFunction::OMPPrivateScope &PrivateScope) { + for (auto &&I = D.getClausesOfKind(OMPC_reduction); I; ++I) { + auto *C = cast(*I); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (auto IRef : C->varlists()) { + auto *OrigVD = cast(cast(IRef)->getDecl()); + auto *LHSVD = cast(cast(*ILHS)->getDecl()); + auto *PrivateVD = cast(cast(*IRHS)->getDecl()); + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{ + DeclRefExpr DRE(const_cast(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + IRef->getType(), VK_LValue, IRef->getExprLoc()); + return EmitLValue(&DRE).getAddress(); + }); + // Emit reduction copy. bool IsRegistered = - PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { - // Emit private VarDecl with copy init. - EmitDecl(*VD); - return GetAddrOfLocalVar(VD); + PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{ + // Emit private VarDecl with reduction init. + EmitDecl(*PrivateVD); + return GetAddrOfLocalVar(PrivateVD); }); - assert(IsRegistered && "counter already registered as private"); + assert(IsRegistered && "private var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; - ++IRef; + ++ILHS, ++IRHS; } } } -/// \brief Emits code for OpenMP parallel directive in the parallel region. -static void EmitOMPParallelCall(CodeGenFunction &CGF, - const OMPParallelDirective &S, - llvm::Value *OutlinedFn, - llvm::Value *CapturedStruct) { - if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { +void CodeGenFunction::EmitOMPReductionClauseFinal( + const OMPExecutableDirective &D) { + llvm::SmallVector LHSExprs; + llvm::SmallVector RHSExprs; + llvm::SmallVector ReductionOps; + bool HasAtLeastOneReduction = false; + for (auto &&I = D.getClausesOfKind(OMPC_reduction); I; ++I) { + HasAtLeastOneReduction = true; + auto *C = cast(*I); + LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + } + if (HasAtLeastOneReduction) { + // Emit nowait reduction if nowait clause is present or directive is a + // parallel directive (it always has implicit barrier). + CGM.getOpenMPRuntime().emitReduction( + *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps, + D.getSingleClause(OMPC_nowait) || + isOpenMPParallelDirective(D.getDirectiveKind())); + } +} + +static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + const RegionCodeGenTy &CodeGen) { + auto CS = cast(S.getAssociatedStmt()); + auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS); + auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), CodeGen); + if (auto C = S.getSingleClause(OMPC_num_threads)) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); auto NumThreadsClause = cast(C); auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), /*IgnoreResultAssign*/ true); - CGF.CGM.getOpenMPRuntime().EmitOMPNumThreadsClause( + CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( CGF, NumThreads, NumThreadsClause->getLocStart()); } - CGF.CGM.getOpenMPRuntime().EmitOMPParallelCall(CGF, S.getLocStart(), - OutlinedFn, CapturedStruct); + const Expr *IfCond = nullptr; + if (auto C = S.getSingleClause(OMPC_if)) { + IfCond = cast(C)->getCondition(); + } + CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, + CapturedStruct, IfCond); } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { - auto CS = cast(S.getAssociatedStmt()); - auto CapturedStruct = GenerateCapturedStmtArgument(*CS); - auto OutlinedFn = CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin()); - if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { - auto Cond = cast(C)->getCondition(); - EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) { - if (ThenBlock) - EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); - else - CGM.getOpenMPRuntime().EmitOMPSerialCall(*this, S.getLocStart(), - OutlinedFn, CapturedStruct); - }); - } else - EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); + LexicalScope Scope(*this, S.getSourceRange()); + // Emit parallel region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + OMPPrivateScope PrivateScope(CGF); + bool Copyins = CGF.EmitOMPCopyinClause(S); + bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); + if (Copyins || Firstprivates) { + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables or propagation master's thread + // values of threadprivate variables to local instances of that variables + // of all other implicit threads. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), + OMPD_unknown); + } + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S); + // Emit implicit barrier at the end of the 'parallel' directive. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), + OMPD_unknown); + }; + emitCommonOMPParallelDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, @@ -281,6 +496,14 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, for (auto I : S.updates()) { EmitIgnoredExpr(I); } + // Update the linear variables. + for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) { + auto *C = cast(*I); + for (auto U : C->updates()) { + EmitIgnoredExpr(U); + } + } + // On a continue in the body, jump to the end. auto Continue = getJumpDestInCurrentScope("omp.body.continue"); BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue)); @@ -297,11 +520,12 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, } } -void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, - bool SeparateIter) { +void CodeGenFunction::EmitOMPInnerLoop( + const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, + const Expr *IncExpr, + const llvm::function_ref &BodyGen, + const llvm::function_ref &PostIncGen) { auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); - auto Cnt = getPGORegionCounter(&S); // Start the loop with a block that tests the condition. auto CondBlock = createBasicBlock("omp.inner.for.cond"); @@ -311,35 +535,31 @@ void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S, // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. auto ExitBlock = LoopExit.getBlock(); - if (LoopScope.requiresCleanups()) + if (RequiresCleanup) ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); auto LoopBody = createBasicBlock("omp.inner.for.body"); - // Emit condition: "IV < LastIteration + 1 [ - 1]" - // ("- 1" when lastprivate clause is present - separate one iteration). - llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter)); - Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, - PGO.createLoopWeights(S.getCond(SeparateIter), Cnt)); - + // Emit condition. + EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); EmitBranchThroughCleanup(LoopExit); } EmitBlock(LoopBody); - Cnt.beginRegion(Builder); + incrementProfileCounter(&S); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); - EmitOMPLoopBody(S); - EmitStopPoint(&S); + BodyGen(*this); // Emit "IV = IV + 1" and a back-edge to the condition block. EmitBlock(Continue.getBlock()); - EmitIgnoredExpr(S.getInc()); + EmitIgnoredExpr(IncExpr); + PostIncGen(*this); BreakContinueStack.pop_back(); EmitBranch(CondBlock); LoopStack.pop(); @@ -350,11 +570,38 @@ void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S, void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) { auto IC = S.counters().begin(); for (auto F : S.finals()) { - if (LocalDeclMap.lookup(cast((*IC))->getDecl())) { + auto *OrigVD = cast(cast((*IC))->getDecl()); + if (LocalDeclMap.lookup(OrigVD)) { + DeclRefExpr DRE(const_cast(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); + auto *OrigAddr = EmitLValue(&DRE).getAddress(); + OMPPrivateScope VarScope(*this); + VarScope.addPrivate(OrigVD, + [OrigAddr]() -> llvm::Value *{ return OrigAddr; }); + (void)VarScope.Privatize(); EmitIgnoredExpr(F); } ++IC; } + // Emit the final values of the linear variables. + for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) { + auto *C = cast(*I); + auto IC = C->varlist_begin(); + for (auto F : C->finals()) { + auto *OrigVD = cast(cast(*IC)->getDecl()); + DeclRefExpr DRE(const_cast(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); + auto *OrigAddr = EmitLValue(&DRE).getAddress(); + OMPPrivateScope VarScope(*this); + VarScope.addPrivate(OrigVD, + [OrigAddr]() -> llvm::Value *{ return OrigAddr; }); + (void)VarScope.Privatize(); + EmitIgnoredExpr(F); + ++IC; + } + } } static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM, @@ -388,116 +635,348 @@ static void EmitPrivateLoopCounters(CodeGenFunction &CGF, ArrayRef Counters) { for (auto *E : Counters) { auto VD = cast(cast(E)->getDecl()); - bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * { + (void)LoopScope.addPrivate(VD, [&]() -> llvm::Value *{ // Emit var without initialization. auto VarEmission = CGF.EmitAutoVarAlloca(*VD); CGF.EmitAutoVarCleanups(VarEmission); return VarEmission.getAllocatedAddress(); }); - assert(IsRegistered && "counter already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; } - (void)LoopScope.Privatize(); +} + +static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, + const Expr *Cond, llvm::BasicBlock *TrueBlock, + llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { + CodeGenFunction::OMPPrivateScope PreCondScope(CGF); + EmitPrivateLoopCounters(CGF, PreCondScope, S.counters()); + const VarDecl *IVDecl = + cast(cast(S.getIterationVariable())->getDecl()); + bool IsRegistered = PreCondScope.addPrivate(IVDecl, [&]() -> llvm::Value *{ + // Emit var without initialization. + auto VarEmission = CGF.EmitAutoVarAlloca(*IVDecl); + CGF.EmitAutoVarCleanups(VarEmission); + return VarEmission.getAllocatedAddress(); + }); + assert(IsRegistered && "counter already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + (void)PreCondScope.Privatize(); + // Initialize internal counter to 0 to calculate initial values of real + // counters. + LValue IV = CGF.EmitLValue(S.getIterationVariable()); + CGF.EmitStoreOfScalar( + llvm::ConstantInt::getNullValue( + IV.getAddress()->getType()->getPointerElementType()), + CGF.EmitLValue(S.getIterationVariable()), /*isInit=*/true); + // Get initial values of real counters. + for (auto I : S.updates()) { + CGF.EmitIgnoredExpr(I); + } + // Check that loop is executed at least one time. + CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); +} + +static void +EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, + CodeGenFunction::OMPPrivateScope &PrivateScope) { + for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) { + auto *C = cast(*I); + for (auto *E : C->varlists()) { + auto VD = cast(cast(E)->getDecl()); + bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * { + // Emit var without initialization. + auto VarEmission = CGF.EmitAutoVarAlloca(*VD); + CGF.EmitAutoVarCleanups(VarEmission); + return VarEmission.getAllocatedAddress(); + }); + assert(IsRegistered && "linear var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + } + } } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { - // Pragma 'simd' code depends on presence of 'lastprivate'. - // If present, we have to separate last iteration of the loop: + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + // Pragma 'simd' code depends on presence of 'lastprivate'. + // If present, we have to separate last iteration of the loop: + // + // if (PreCond) { + // for (IV in 0..LastIteration-1) BODY; + // BODY with updates of lastprivate vars; + // ; + // } + // + // otherwise (when there's no lastprivate): + // + // if (PreCond) { + // for (IV in 0..LastIteration) BODY; + // ; + // } + // + + // Emit: if (PreCond) - begin. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); + ContBlock = CGF.createBasicBlock("simd.if.end"); + emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, + CGF.getProfileCount(&S)); + CGF.EmitBlock(ThenBlock); + CGF.incrementProfileCounter(&S); + } + // Walk clauses and process safelen/lastprivate. + bool SeparateIter = false; + CGF.LoopStack.setParallel(); + CGF.LoopStack.setVectorizerEnable(true); + for (auto C : S.clauses()) { + switch (C->getClauseKind()) { + case OMPC_safelen: { + RValue Len = CGF.EmitAnyExpr(cast(C)->getSafelen(), + AggValueSlot::ignored(), true); + llvm::ConstantInt *Val = cast(Len.getScalarVal()); + CGF.LoopStack.setVectorizerWidth(Val->getZExtValue()); + // In presence of finite 'safelen', it may be unsafe to mark all + // the memory instructions parallel, because loop-carried + // dependences of 'safelen' iterations are possible. + CGF.LoopStack.setParallel(false); + break; + } + case OMPC_aligned: + EmitOMPAlignedClause(CGF, CGF.CGM, cast(*C)); + break; + case OMPC_lastprivate: + SeparateIter = true; + break; + default: + // Not handled yet + ; + } + } + + // Emit inits for the linear variables. + for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) { + auto *C = cast(*I); + for (auto Init : C->inits()) { + auto *D = cast(cast(Init)->getDecl()); + CGF.EmitVarDecl(*D); + } + } + + // Emit the loop iteration variable. + const Expr *IVExpr = S.getIterationVariable(); + const VarDecl *IVDecl = cast(cast(IVExpr)->getDecl()); + CGF.EmitVarDecl(*IVDecl); + CGF.EmitIgnoredExpr(S.getInit()); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on + // each iteration (e.g., it is foldable into a constant). + if (auto LIExpr = dyn_cast(S.getLastIteration())) { + CGF.EmitVarDecl(*cast(LIExpr->getDecl())); + // Emit calculation of the iterations count. + CGF.EmitIgnoredExpr(S.getCalcLastIteration()); + } + + // Emit the linear steps for the linear clauses. + // If a step is not constant, it is pre-calculated before the loop. + for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) { + auto *C = cast(*I); + if (auto CS = cast_or_null(C->getCalcStep())) + if (auto SaveRef = cast(CS->getLHS())) { + CGF.EmitVarDecl(*cast(SaveRef->getDecl())); + // Emit calculation of the linear step. + CGF.EmitIgnoredExpr(CS); + } + } + + { + OMPPrivateScope LoopScope(CGF); + EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); + EmitPrivateLinearVars(CGF, S, LoopScope); + CGF.EmitOMPPrivateClause(S, LoopScope); + (void)LoopScope.Privatize(); + CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(SeparateIter), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + if (SeparateIter) { + CGF.EmitOMPLoopBody(S, /*SeparateIter=*/true); + } + } + CGF.EmitOMPSimdFinal(S); + // Emit: if (PreCond) - end. + if (ContBlock) { + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + }; + CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); +} + +void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, + const OMPLoopDirective &S, + OMPPrivateScope &LoopScope, + bool Ordered, llvm::Value *LB, + llvm::Value *UB, llvm::Value *ST, + llvm::Value *IL, llvm::Value *Chunk) { + auto &RT = CGM.getOpenMPRuntime(); + + // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). + const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); + + assert((Ordered || + !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && + "static non-chunked schedule does not need outer loop"); + + // Emit outer loop. + // + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When schedule(dynamic,chunk_size) is specified, the iterations are + // distributed to threads in the team in chunks as the threads request them. + // Each thread executes a chunk of iterations, then requests another chunk, + // until no chunks remain to be distributed. Each chunk contains chunk_size + // iterations, except for the last chunk to be distributed, which may have + // fewer iterations. When no chunk_size is specified, it defaults to 1. // - // if (LastIteration != 0) { - // for (IV in 0..LastIteration-1) BODY; - // BODY with updates of lastprivate vars; - // ; + // When schedule(guided,chunk_size) is specified, the iterations are assigned + // to threads in the team in chunks as the executing threads request them. + // Each thread executes a chunk of iterations, then requests another chunk, + // until no chunks remain to be assigned. For a chunk_size of 1, the size of + // each chunk is proportional to the number of unassigned iterations divided + // by the number of threads in the team, decreasing to 1. For a chunk_size + // with value k (greater than 1), the size of each chunk is determined in the + // same way, with the restriction that the chunks do not contain fewer than k + // iterations (except for the last chunk to be assigned, which may have fewer + // than k iterations). + // + // When schedule(auto) is specified, the decision regarding scheduling is + // delegated to the compiler and/or runtime system. The programmer gives the + // implementation the freedom to choose any possible mapping of iterations to + // threads in the team. + // + // When schedule(runtime) is specified, the decision regarding scheduling is + // deferred until run time, and the schedule and chunk size are taken from the + // run-sched-var ICV. If the ICV is set to auto, the schedule is + // implementation defined + // + // while(__kmpc_dispatch_next(&LB, &UB)) { + // idx = LB; + // while (idx <= UB) { BODY; ++idx; + // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. + // } // inner loop // } // - // otherwise (when there's no lastprivate): + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When schedule(static, chunk_size) is specified, iterations are divided into + // chunks of size chunk_size, and the chunks are assigned to the threads in + // the team in a round-robin fashion in the order of the thread number. // - // for (IV in 0..LastIteration) BODY; - // ; + // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { + // while (idx <= UB) { BODY; ++idx; } // inner loop + // LB = LB + ST; + // UB = UB + ST; + // } // - // Walk clauses and process safelen/lastprivate. - bool SeparateIter = false; - LoopStack.setParallel(); - LoopStack.setVectorizerEnable(true); - for (auto C : S.clauses()) { - switch (C->getClauseKind()) { - case OMPC_safelen: { - RValue Len = EmitAnyExpr(cast(C)->getSafelen(), - AggValueSlot::ignored(), true); - llvm::ConstantInt *Val = cast(Len.getScalarVal()); - LoopStack.setVectorizerWidth(Val->getZExtValue()); - // In presence of finite 'safelen', it may be unsafe to mark all - // the memory instructions parallel, because loop-carried - // dependences of 'safelen' iterations are possible. - LoopStack.setParallel(false); - break; - } - case OMPC_aligned: - EmitOMPAlignedClause(*this, CGM, cast(*C)); - break; - case OMPC_lastprivate: - SeparateIter = true; - break; - default: - // Not handled yet - ; - } - } + const Expr *IVExpr = S.getIterationVariable(); + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); - RunCleanupsScope DirectiveScope(*this); + RT.emitForInit( + *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, IL, LB, + (DynamicOrOrdered ? EmitAnyExpr(S.getLastIteration()).getScalarVal() + : UB), + ST, Chunk); - CGDebugInfo *DI = getDebugInfo(); - if (DI) - DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin()); + auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); - // Emit the loop iteration variable. - const Expr *IVExpr = S.getIterationVariable(); - const VarDecl *IVDecl = cast(cast(IVExpr)->getDecl()); - EmitVarDecl(*IVDecl); - EmitIgnoredExpr(S.getInit()); + // Start the loop with a block that tests the condition. + auto CondBlock = createBasicBlock("omp.dispatch.cond"); + EmitBlock(CondBlock); + LoopStack.push(CondBlock); - // Emit the iterations count variable. - // If it is not a variable, Sema decided to calculate iterations count on each - // iteration (e.g., it is foldable into a constant). - if (auto LIExpr = dyn_cast(S.getLastIteration())) { - EmitVarDecl(*cast(LIExpr->getDecl())); - // Emit calculation of the iterations count. - EmitIgnoredExpr(S.getCalcLastIteration()); + llvm::Value *BoolCondVal = nullptr; + if (!DynamicOrOrdered) { + // UB = min(UB, GlobalUB) + EmitIgnoredExpr(S.getEnsureUpperBound()); + // IV = LB + EmitIgnoredExpr(S.getInit()); + // IV < UB + BoolCondVal = EvaluateExprAsBool(S.getCond(false)); + } else { + BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, + IL, LB, UB, ST); } - if (SeparateIter) { - // Emit: if (LastIteration > 0) - begin. - RegionCounter Cnt = getPGORegionCounter(&S); - auto ThenBlock = createBasicBlock("simd.if.then"); - auto ContBlock = createBasicBlock("simd.if.end"); - EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); - EmitBlock(ThenBlock); - Cnt.beginRegion(Builder); - // Emit 'then' code. - { - OMPPrivateScope LoopScope(*this); - EmitPrivateLoopCounters(*this, LoopScope, S.counters()); - EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true); - EmitOMPLoopBody(S, /* SeparateIter */ true); - } - EmitOMPSimdFinal(S); - // Emit: if (LastIteration != 0) - end. - EmitBranch(ContBlock); - EmitBlock(ContBlock, true); - } else { - { - OMPPrivateScope LoopScope(*this); - EmitPrivateLoopCounters(*this, LoopScope, S.counters()); - EmitOMPInnerLoop(S, LoopScope); - } - EmitOMPSimdFinal(S); + // If there are any cleanups between here and the loop-exit scope, + // create a block to stage a loop exit along. + auto ExitBlock = LoopExit.getBlock(); + if (LoopScope.requiresCleanups()) + ExitBlock = createBasicBlock("omp.dispatch.cleanup"); + + auto LoopBody = createBasicBlock("omp.dispatch.body"); + Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); + if (ExitBlock != LoopExit.getBlock()) { + EmitBlock(ExitBlock); + EmitBranchThroughCleanup(LoopExit); + } + EmitBlock(LoopBody); + + // Emit "IV = LB" (in case of static schedule, we have already calculated new + // LB for loop condition and emitted it above). + if (DynamicOrOrdered) + EmitIgnoredExpr(S.getInit()); + + // Create a block for the increment. + auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); + BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); + + SourceLocation Loc = S.getLocStart(); + // Generate !llvm.loop.parallel metadata for loads and stores for loops with + // dynamic/guided scheduling and without ordered clause. + LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic || + ScheduleKind == OMPC_SCHEDULE_guided) && + !Ordered); + EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false), + S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S); + CGF.EmitStopPoint(&S); + }, + [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { + if (Ordered) { + CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( + CGF, Loc, IVSize, IVSigned); + } + }); + + EmitBlock(Continue.getBlock()); + BreakContinueStack.pop_back(); + if (!DynamicOrOrdered) { + // Emit "LB = LB + Stride", "UB = UB + Stride". + EmitIgnoredExpr(S.getNextLowerBound()); + EmitIgnoredExpr(S.getNextUpperBound()); } - if (DI) - DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); + EmitBranch(CondBlock); + LoopStack.pop(); + // Emit the fall-through block. + EmitBlock(LoopExit.getBlock()); + + // Tell the runtime we are done. + if (!DynamicOrOrdered) + RT.emitForStaticFinish(*this, S.getLocEnd()); } /// \brief Emit a helper variable and return corresponding lvalue. @@ -508,7 +987,39 @@ static LValue EmitOMPHelperVar(CodeGenFunction &CGF, return CGF.EmitLValue(Helper); } -void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { +static std::pair +emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, + bool OuterRegion) { + // Detect the loop schedule kind and chunk. + auto ScheduleKind = OMPC_SCHEDULE_unknown; + llvm::Value *Chunk = nullptr; + if (auto *C = + cast_or_null(S.getSingleClause(OMPC_schedule))) { + ScheduleKind = C->getScheduleKind(); + if (const auto *Ch = C->getChunkSize()) { + if (auto *ImpRef = cast_or_null(C->getHelperChunkSize())) { + if (OuterRegion) { + const VarDecl *ImpVar = cast(ImpRef->getDecl()); + CGF.EmitVarDecl(*ImpVar); + CGF.EmitStoreThroughLValue( + CGF.EmitAnyExpr(Ch), + CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(ImpVar), + ImpVar->getType())); + } else { + Ch = ImpRef; + } + } + if (!C->getHelperChunkSize() || !OuterRegion) { + Chunk = CGF.EmitScalarExpr(Ch); + Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(), + S.getIterationVariable()->getType()); + } + } + } + return std::make_pair(Chunk, ScheduleKind); +} + +bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // Emit the loop iteration variable. auto IVExpr = cast(S.getIterationVariable()); auto IVDecl = cast(IVExpr->getDecl()); @@ -525,15 +1036,25 @@ void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { auto &RT = CGM.getOpenMPRuntime(); + bool HasLastprivateClause; // Check pre-condition. { // Skip the entire loop if we don't meet the precondition. - RegionCounter Cnt = getPGORegionCounter(&S); - auto ThenBlock = createBasicBlock("omp.precond.then"); - auto ContBlock = createBasicBlock("omp.precond.end"); - EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); - EmitBlock(ThenBlock); - Cnt.beginRegion(Builder); + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return false; + } else { + auto *ThenBlock = createBasicBlock("omp.precond.then"); + ContBlock = createBasicBlock("omp.precond.end"); + emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, + getProfileCount(&S)); + EmitBlock(ThenBlock); + incrementProfileCounter(&S); + } // Emit 'then' code. { // Emit helper vars inits. @@ -547,105 +1068,342 @@ void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { EmitOMPHelperVar(*this, cast(S.getIsLastIterVariable())); OMPPrivateScope LoopScope(*this); + if (EmitOMPFirstprivateClause(S, LoopScope)) { + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables. + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), + OMPD_unknown); + } + EmitOMPPrivateClause(S, LoopScope); + HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); + EmitOMPReductionClauseInit(S, LoopScope); EmitPrivateLoopCounters(*this, LoopScope, S.counters()); + (void)LoopScope.Privatize(); // Detect the loop schedule kind and chunk. - auto ScheduleKind = OMPC_SCHEDULE_unknown; - llvm::Value *Chunk = nullptr; - if (auto C = cast_or_null( - S.getSingleClause(OMPC_schedule))) { - ScheduleKind = C->getScheduleKind(); - if (auto Ch = C->getChunkSize()) { - Chunk = EmitScalarExpr(Ch); - Chunk = EmitScalarConversion(Chunk, Ch->getType(), - S.getIterationVariable()->getType()); - } - } + llvm::Value *Chunk; + OpenMPScheduleClauseKind ScheduleKind; + auto ScheduleInfo = + emitScheduleClause(*this, S, /*OuterRegion=*/false); + Chunk = ScheduleInfo.first; + ScheduleKind = ScheduleInfo.second; const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + const bool Ordered = S.getSingleClause(OMPC_ordered) != nullptr; if (RT.isStaticNonchunked(ScheduleKind, - /* Chunked */ Chunk != nullptr)) { + /* Chunked */ Chunk != nullptr) && + !Ordered) { // OpenMP [2.7.1, Loop Construct, Description, table 2-1] // When no chunk_size is specified, the iteration space is divided into // chunks that are approximately equal in size, and at most one chunk is // distributed to each thread. Note that the size of the chunks is // unspecified in this case. - RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - IL.getAddress(), LB.getAddress(), UB.getAddress(), - ST.getAddress()); + RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, + Ordered, IL.getAddress(), LB.getAddress(), + UB.getAddress(), ST.getAddress()); // UB = min(UB, GlobalUB); EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; EmitIgnoredExpr(S.getInit()); // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/false), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); // Tell the runtime we are done. - RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind); - } else - ErrorUnsupported(&S, "OpenMP loop with requested schedule"); + RT.emitForStaticFinish(*this, S.getLocStart()); + } else { + // Emit the outer loop, which requests its work chunk [LB..UB] from + // runtime and runs the inner loop to process it. + EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered, + LB.getAddress(), UB.getAddress(), ST.getAddress(), + IL.getAddress(), Chunk); + } + EmitOMPReductionClauseFinal(S); + // Emit final copy of the lastprivate variables if IsLastIter != 0. + if (HasLastprivateClause) + EmitOMPLastprivateClauseFinal( + S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); } // We're now done with the loop, so jump to the continuation block. - EmitBranch(ContBlock); - EmitBlock(ContBlock, true); + if (ContBlock) { + EmitBranch(ContBlock); + EmitBlock(ContBlock, true); + } } + return HasLastprivateClause; } void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { - InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); - RunCleanupsScope DirectiveScope(*this); - - CGDebugInfo *DI = getDebugInfo(); - if (DI) - DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin()); - - EmitOMPWorksharingLoop(S); + LexicalScope Scope(*this, S.getSourceRange()); + bool HasLastprivates = false; + auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { + HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + }; + CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); // Emit an implicit barrier at the end. - CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart(), - /*IsExplicit*/ false); - if (DI) - DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd()); + if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) { + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); + } } void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); } -void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) { - llvm_unreachable("CodeGen for 'omp sections' is not supported yet."); +static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, + const Twine &Name, + llvm::Value *Init = nullptr) { + auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); + if (Init) + CGF.EmitScalarInit(Init, LVal); + return LVal; } -void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) { - llvm_unreachable("CodeGen for 'omp section' is not supported yet."); +static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF, + const OMPExecutableDirective &S) { + auto *Stmt = cast(S.getAssociatedStmt())->getCapturedStmt(); + auto *CS = dyn_cast(Stmt); + if (CS && CS->size() > 1) { + bool HasLastprivates = false; + auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) { + auto &C = CGF.CGM.getContext(); + auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + // Emit helper vars inits. + LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", + CGF.Builder.getInt32(0)); + auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1); + LValue UB = + createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); + LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", + CGF.Builder.getInt32(1)); + LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", + CGF.Builder.getInt32(0)); + // Loop counter. + LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); + OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); + CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); + OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); + CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); + // Generate condition for loop. + BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, + OK_Ordinary, S.getLocStart(), + /*fpContractable=*/false); + // Increment for loop counter. + UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, + OK_Ordinary, S.getLocStart()); + auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) { + // Iterate through all sections and emit a switch construct: + // switch (IV) { + // case 0: + // ; + // break; + // ... + // case - 1: + // - 1]>; + // break; + // } + // .omp.sections.exit: + auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); + auto *SwitchStmt = CGF.Builder.CreateSwitch( + CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, + CS->size()); + unsigned CaseNumber = 0; + for (auto C = CS->children(); C; ++C, ++CaseNumber) { + auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); + CGF.EmitBlock(CaseBB); + SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); + CGF.EmitStmt(*C); + CGF.EmitBranch(ExitBB); + } + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); + }; + + CodeGenFunction::OMPPrivateScope LoopScope(CGF); + if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), + OMPD_unknown); + } + CGF.EmitOMPPrivateClause(S, LoopScope); + HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); + CGF.EmitOMPReductionClauseInit(S, LoopScope); + (void)LoopScope.Privatize(); + + // Emit static non-chunked loop. + CGF.CGM.getOpenMPRuntime().emitForInit( + CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, + /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), + LB.getAddress(), UB.getAddress(), ST.getAddress()); + // UB = min(UB, GlobalUB); + auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); + auto *MinUBGlobalUB = CGF.Builder.CreateSelect( + CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); + CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); + // IV = LB; + CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); + // while (idx <= UB) { BODY; ++idx; } + CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, + [](CodeGenFunction &) {}); + // Tell the runtime we are done. + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart()); + CGF.EmitOMPReductionClauseFinal(S); + + // Emit final copy of the lastprivate variables if IsLastIter != 0. + if (HasLastprivates) + CGF.EmitOMPLastprivateClauseFinal( + S, CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart()))); + }; + + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen); + // Emit barrier for lastprivates only if 'sections' directive has 'nowait' + // clause. Otherwise the barrier will be generated by the codegen for the + // directive. + if (HasLastprivates && S.getSingleClause(OMPC_nowait)) { + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), + OMPD_unknown); + } + return OMPD_sections; + } + // If only one section is found - no need to generate loop, emit as a single + // region. + bool HasFirstprivates; + // No need to generate reductions for sections with single section region, we + // can use original shared variables for all operations. + bool HasReductions = !S.getClausesOfKind(OMPC_reduction).empty(); + // No need to generate lastprivates for sections with single section region, + // we can use original shared variable for all calculations with barrier at + // the end of the sections. + bool HasLastprivates = !S.getClausesOfKind(OMPC_lastprivate).empty(); + auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) { + CodeGenFunction::OMPPrivateScope SingleScope(CGF); + HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); + CGF.EmitOMPPrivateClause(S, SingleScope); + (void)SingleScope.Privatize(); + + CGF.EmitStmt(Stmt); + CGF.EnsureInsertPoint(); + }; + CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(), + llvm::None, llvm::None, + llvm::None, llvm::None); + // Emit barrier for firstprivates, lastprivates or reductions only if + // 'sections' directive has 'nowait' clause. Otherwise the barrier will be + // generated by the codegen for the directive. + if ((HasFirstprivates || HasLastprivates || HasReductions) && + S.getSingleClause(OMPC_nowait)) { + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), + OMPD_unknown); + } + return OMPD_single; +} + +void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + OpenMPDirectiveKind EmittedAs = emitSections(*this, S); + // Emit an implicit barrier at the end. + if (!S.getSingleClause(OMPC_nowait)) { + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); + } } -void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &) { - llvm_unreachable("CodeGen for 'omp single' is not supported yet."); +void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EnsureInsertPoint(); + }; + CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); +} + +void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { + llvm::SmallVector CopyprivateVars; + llvm::SmallVector DestExprs; + llvm::SmallVector SrcExprs; + llvm::SmallVector AssignmentOps; + // Check if there are any 'copyprivate' clauses associated with this + // 'single' + // construct. + // Build a list of copyprivate variables along with helper expressions + // (, , = expressions) + for (auto &&I = S.getClausesOfKind(OMPC_copyprivate); I; ++I) { + auto *C = cast(*I); + CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); + DestExprs.append(C->destination_exprs().begin(), + C->destination_exprs().end()); + SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); + AssignmentOps.append(C->assignment_ops().begin(), + C->assignment_ops().end()); + } + LexicalScope Scope(*this, S.getSourceRange()); + // Emit code for 'single' region along with 'copyprivate' clauses + bool HasFirstprivates; + auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) { + CodeGenFunction::OMPPrivateScope SingleScope(CGF); + HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); + CGF.EmitOMPPrivateClause(S, SingleScope); + (void)SingleScope.Privatize(); + + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EnsureInsertPoint(); + }; + CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), + CopyprivateVars, DestExprs, SrcExprs, + AssignmentOps); + // Emit an implicit barrier at the end (to avoid data race on firstprivate + // init or if no 'nowait' clause was specified and no 'copyprivate' clause). + if ((!S.getSingleClause(OMPC_nowait) || HasFirstprivates) && + CopyprivateVars.empty()) { + CGM.getOpenMPRuntime().emitBarrierCall( + *this, S.getLocStart(), + S.getSingleClause(OMPC_nowait) ? OMPD_unknown : OMPD_single); + } } void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { - CGM.getOpenMPRuntime().EmitOMPMasterRegion(*this, [&]() -> void { - InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); - RunCleanupsScope Scope(*this); - EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); - EnsureInsertPoint(); - }, S.getLocStart()); + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EnsureInsertPoint(); + }; + CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { - CGM.getOpenMPRuntime().EmitOMPCriticalRegion( - *this, S.getDirectiveName().getAsString(), [&]() -> void { - InlinedOpenMPRegion Region(*this, S.getAssociatedStmt()); - RunCleanupsScope Scope(*this); - EmitStmt( - cast(S.getAssociatedStmt())->getCapturedStmt()); - EnsureInsertPoint(); - }, S.getLocStart()); + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EnsureInsertPoint(); + }; + CGM.getOpenMPRuntime().emitCriticalRegion( + *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart()); } -void -CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) { - llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet."); +void CodeGenFunction::EmitOMPParallelForDirective( + const OMPParallelForDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'for' directive. + LexicalScope Scope(*this, S.getSourceRange()); + (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitOMPWorksharingLoop(S); + // Emit implicit barrier at the end of parallel region, but this barrier + // is at the end of 'for' directive, so emit it as the implicit barrier for + // this 'for' directive. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), + OMPD_parallel); + }; + emitCommonOMPParallelDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPParallelForSimdDirective( @@ -654,45 +1412,600 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( } void CodeGenFunction::EmitOMPParallelSectionsDirective( - const OMPParallelSectionsDirective &) { - llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); + const OMPParallelSectionsDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'sections' directive. + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + (void)emitSections(CGF, S); + // Emit implicit barrier at the end of parallel region. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), + OMPD_parallel); + }; + emitCommonOMPParallelDirective(*this, S, CodeGen); } -void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) { - llvm_unreachable("CodeGen for 'omp task' is not supported yet."); +void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { + // Emit outlined function for task construct. + LexicalScope Scope(*this, S.getSourceRange()); + auto CS = cast(S.getAssociatedStmt()); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); + auto *I = CS->getCapturedDecl()->param_begin(); + auto *PartId = std::next(I); + // The first function argument for tasks is a thread id, the second one is a + // part id (0 for tied tasks, >=0 for untied task). + llvm::DenseSet EmittedAsPrivate; + // Get list of private variables. + llvm::SmallVector PrivateVars; + llvm::SmallVector PrivateCopies; + for (auto &&I = S.getClausesOfKind(OMPC_private); I; ++I) { + auto *C = cast(*I); + auto IRef = C->varlist_begin(); + for (auto *IInit : C->private_copies()) { + auto *OrigVD = cast(cast(*IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + PrivateVars.push_back(*IRef); + PrivateCopies.push_back(IInit); + } + ++IRef; + } + } + EmittedAsPrivate.clear(); + // Get list of firstprivate variables. + llvm::SmallVector FirstprivateVars; + llvm::SmallVector FirstprivateCopies; + llvm::SmallVector FirstprivateInits; + for (auto &&I = S.getClausesOfKind(OMPC_firstprivate); I; ++I) { + auto *C = cast(*I); + auto IRef = C->varlist_begin(); + auto IElemInitRef = C->inits().begin(); + for (auto *IInit : C->private_copies()) { + auto *OrigVD = cast(cast(*IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + FirstprivateVars.push_back(*IRef); + FirstprivateCopies.push_back(IInit); + FirstprivateInits.push_back(*IElemInitRef); + } + ++IRef, ++IElemInitRef; + } + } + auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( + CodeGenFunction &CGF) { + // Set proper addresses for generated private copies. + auto *CS = cast(S.getAssociatedStmt()); + OMPPrivateScope Scope(CGF); + if (!PrivateVars.empty() || !FirstprivateVars.empty()) { + auto *CopyFn = CGF.Builder.CreateAlignedLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)), + CGF.PointerAlignInBytes); + auto *PrivatesPtr = CGF.Builder.CreateAlignedLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)), + CGF.PointerAlignInBytes); + // Map privates. + llvm::SmallVector, 16> + PrivatePtrs; + llvm::SmallVector CallArgs; + CallArgs.push_back(PrivatesPtr); + for (auto *E : PrivateVars) { + auto *VD = cast(cast(E)->getDecl()); + auto *PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr); + } + for (auto *E : FirstprivateVars) { + auto *VD = cast(cast(E)->getDecl()); + auto *PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr); + } + CGF.EmitRuntimeCall(CopyFn, CallArgs); + for (auto &&Pair : PrivatePtrs) { + auto *Replacement = + CGF.Builder.CreateAlignedLoad(Pair.second, CGF.PointerAlignInBytes); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } + } + (void)Scope.Privatize(); + if (*PartId) { + // TODO: emit code for untied tasks. + } + CGF.EmitStmt(CS->getCapturedStmt()); + }; + auto OutlinedFn = + CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen); + // Check if we should emit tied or untied task. + bool Tied = !S.getSingleClause(OMPC_untied); + // Check if the task is final + llvm::PointerIntPair Final; + if (auto *Clause = S.getSingleClause(OMPC_final)) { + // If the condition constant folds and can be elided, try to avoid emitting + // the condition and the dead arm of the if/else. + auto *Cond = cast(Clause)->getCondition(); + bool CondConstant; + if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) + Final.setInt(CondConstant); + else + Final.setPointer(EvaluateExprAsBool(Cond)); + } else { + // By default the task is not final. + Final.setInt(/*IntVal=*/false); + } + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + const Expr *IfCond = nullptr; + if (auto C = S.getSingleClause(OMPC_if)) { + IfCond = cast(C)->getCondition(); + } + CGM.getOpenMPRuntime().emitTaskCall( + *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, + CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, + FirstprivateCopies, FirstprivateInits); } -void CodeGenFunction::EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &) { - llvm_unreachable("CodeGen for 'omp taskyield' is not supported yet."); +void CodeGenFunction::EmitOMPTaskyieldDirective( + const OMPTaskyieldDirective &S) { + CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); } void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { - CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart()); + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); } -void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) { - llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet."); +void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { + CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); } void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { - CGM.getOpenMPRuntime().EmitOMPFlush( - *this, [&]() -> ArrayRef { - if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { - auto FlushClause = cast(C); - return llvm::makeArrayRef(FlushClause->varlist_begin(), - FlushClause->varlist_end()); - } - return llvm::None; - }(), - S.getLocStart()); + CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef { + if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { + auto FlushClause = cast(C); + return llvm::makeArrayRef(FlushClause->varlist_begin(), + FlushClause->varlist_end()); + } + return llvm::None; + }(), S.getLocStart()); +} + +void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EnsureInsertPoint(); + }; + CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart()); +} + +static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, + QualType SrcType, QualType DestType) { + assert(CGF.hasScalarEvaluationKind(DestType) && + "DestType must have scalar evaluation kind."); + assert(!Val.isAggregate() && "Must be a scalar or complex."); + return Val.isScalar() + ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) + : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, + DestType); +} + +static CodeGenFunction::ComplexPairTy +convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, + QualType DestType) { + assert(CGF.getEvaluationKind(DestType) == TEK_Complex && + "DestType must have complex evaluation kind."); + CodeGenFunction::ComplexPairTy ComplexVal; + if (Val.isScalar()) { + // Convert the input element to the element type of the complex. + auto DestElementType = DestType->castAs()->getElementType(); + auto ScalarVal = + CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); + ComplexVal = CodeGenFunction::ComplexPairTy( + ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); + } else { + assert(Val.isComplex() && "Must be a scalar or complex."); + auto SrcElementType = SrcType->castAs()->getElementType(); + auto DestElementType = DestType->castAs()->getElementType(); + ComplexVal.first = CGF.EmitScalarConversion( + Val.getComplexVal().first, SrcElementType, DestElementType); + ComplexVal.second = CGF.EmitScalarConversion( + Val.getComplexVal().second, SrcElementType, DestElementType); + } + return ComplexVal; +} + +static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, + LValue LVal, RValue RVal) { + if (LVal.isGlobalReg()) { + CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); + } else { + CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent + : llvm::Monotonic, + LVal.isVolatile(), /*IsInit=*/false); + } +} + +static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal, + QualType RValTy) { + switch (CGF.getEvaluationKind(LVal.getType())) { + case TEK_Scalar: + CGF.EmitStoreThroughLValue( + RValue::get(convertToScalarValue(CGF, RVal, RValTy, LVal.getType())), + LVal); + break; + case TEK_Complex: + CGF.EmitStoreOfComplex( + convertToComplexValue(CGF, RVal, RValTy, LVal.getType()), LVal, + /*isInit=*/false); + break; + case TEK_Aggregate: + llvm_unreachable("Must be a scalar or complex."); + } } -void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) { - llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); +static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, + const Expr *X, const Expr *V, + SourceLocation Loc) { + // v = x; + assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); + assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); + LValue XLValue = CGF.EmitLValue(X); + LValue VLValue = CGF.EmitLValue(V); + RValue Res = XLValue.isGlobalReg() + ? CGF.EmitLoadOfLValue(XLValue, Loc) + : CGF.EmitAtomicLoad(XLValue, Loc, + IsSeqCst ? llvm::SequentiallyConsistent + : llvm::Monotonic, + XLValue.isVolatile()); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); + emitSimpleStore(CGF,VLValue, Res, X->getType().getNonReferenceType()); } -void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) { - llvm_unreachable("CodeGen for 'omp atomic' is not supported yet."); +static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, + const Expr *X, const Expr *E, + SourceLocation Loc) { + // x = expr; + assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); + emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); +} + +static std::pair emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, + RValue Update, + BinaryOperatorKind BO, + llvm::AtomicOrdering AO, + bool IsXLHSInRHSPart) { + auto &Context = CGF.CGM.getContext(); + // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' + // expression is simple and atomic is allowed for the given type for the + // target platform. + if (BO == BO_Comma || !Update.isScalar() || + !Update.getScalarVal()->getType()->isIntegerTy() || + !X.isSimple() || (!isa(Update.getScalarVal()) && + (Update.getScalarVal()->getType() != + X.getAddress()->getType()->getPointerElementType())) || + !X.getAddress()->getType()->getPointerElementType()->isIntegerTy() || + !Context.getTargetInfo().hasBuiltinAtomic( + Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) + return std::make_pair(false, RValue::get(nullptr)); + + llvm::AtomicRMWInst::BinOp RMWOp; + switch (BO) { + case BO_Add: + RMWOp = llvm::AtomicRMWInst::Add; + break; + case BO_Sub: + if (!IsXLHSInRHSPart) + return std::make_pair(false, RValue::get(nullptr)); + RMWOp = llvm::AtomicRMWInst::Sub; + break; + case BO_And: + RMWOp = llvm::AtomicRMWInst::And; + break; + case BO_Or: + RMWOp = llvm::AtomicRMWInst::Or; + break; + case BO_Xor: + RMWOp = llvm::AtomicRMWInst::Xor; + break; + case BO_LT: + RMWOp = X.getType()->hasSignedIntegerRepresentation() + ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min + : llvm::AtomicRMWInst::Max) + : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin + : llvm::AtomicRMWInst::UMax); + break; + case BO_GT: + RMWOp = X.getType()->hasSignedIntegerRepresentation() + ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max + : llvm::AtomicRMWInst::Min) + : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax + : llvm::AtomicRMWInst::UMin); + break; + case BO_Assign: + RMWOp = llvm::AtomicRMWInst::Xchg; + break; + case BO_Mul: + case BO_Div: + case BO_Rem: + case BO_Shl: + case BO_Shr: + case BO_LAnd: + case BO_LOr: + return std::make_pair(false, RValue::get(nullptr)); + case BO_PtrMemD: + case BO_PtrMemI: + case BO_LE: + case BO_GE: + case BO_EQ: + case BO_NE: + case BO_AddAssign: + case BO_SubAssign: + case BO_AndAssign: + case BO_OrAssign: + case BO_XorAssign: + case BO_MulAssign: + case BO_DivAssign: + case BO_RemAssign: + case BO_ShlAssign: + case BO_ShrAssign: + case BO_Comma: + llvm_unreachable("Unsupported atomic update operation"); + } + auto *UpdateVal = Update.getScalarVal(); + if (auto *IC = dyn_cast(UpdateVal)) { + UpdateVal = CGF.Builder.CreateIntCast( + IC, X.getAddress()->getType()->getPointerElementType(), + X.getType()->hasSignedIntegerRepresentation()); + } + auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO); + return std::make_pair(true, RValue::get(Res)); +} + +std::pair CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( + LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, + llvm::AtomicOrdering AO, SourceLocation Loc, + const llvm::function_ref &CommonGen) { + // Update expressions are allowed to have the following forms: + // x binop= expr; -> xrval + expr; + // x++, ++x -> xrval + 1; + // x--, --x -> xrval - 1; + // x = x binop expr; -> xrval binop expr + // x = expr Op x; - > expr binop xrval; + auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); + if (!Res.first) { + if (X.isGlobalReg()) { + // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop + // 'xrval'. + EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); + } else { + // Perform compare-and-swap procedure. + EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); + } + } + return Res; +} + +static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, + const Expr *X, const Expr *E, + const Expr *UE, bool IsXLHSInRHSPart, + SourceLocation Loc) { + assert(isa(UE->IgnoreImpCasts()) && + "Update expr in 'atomic update' must be a binary operator."); + auto *BOUE = cast(UE->IgnoreImpCasts()); + // Update expressions are allowed to have the following forms: + // x binop= expr; -> xrval + expr; + // x++, ++x -> xrval + 1; + // x--, --x -> xrval - 1; + // x = x binop expr; -> xrval binop expr + // x = expr Op x; - > expr binop xrval; + assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); + LValue XLValue = CGF.EmitLValue(X); + RValue ExprRValue = CGF.EmitAnyExpr(E); + auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; + auto *LHS = cast(BOUE->getLHS()->IgnoreImpCasts()); + auto *RHS = cast(BOUE->getRHS()->IgnoreImpCasts()); + auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; + auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; + auto Gen = + [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { + CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); + CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); + return CGF.EmitAnyExpr(UE); + }; + (void)CGF.EmitOMPAtomicSimpleUpdateExpr( + XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); +} + +static RValue convertToType(CodeGenFunction &CGF, RValue Value, + QualType SourceType, QualType ResType) { + switch (CGF.getEvaluationKind(ResType)) { + case TEK_Scalar: + return RValue::get(convertToScalarValue(CGF, Value, SourceType, ResType)); + case TEK_Complex: { + auto Res = convertToComplexValue(CGF, Value, SourceType, ResType); + return RValue::getComplex(Res.first, Res.second); + } + case TEK_Aggregate: + break; + } + llvm_unreachable("Must be a scalar or complex."); +} + +static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, + bool IsPostfixUpdate, const Expr *V, + const Expr *X, const Expr *E, + const Expr *UE, bool IsXLHSInRHSPart, + SourceLocation Loc) { + assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); + assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); + RValue NewVVal; + LValue VLValue = CGF.EmitLValue(V); + LValue XLValue = CGF.EmitLValue(X); + RValue ExprRValue = CGF.EmitAnyExpr(E); + auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; + QualType NewVValType; + if (UE) { + // 'x' is updated with some additional value. + assert(isa(UE->IgnoreImpCasts()) && + "Update expr in 'atomic capture' must be a binary operator."); + auto *BOUE = cast(UE->IgnoreImpCasts()); + // Update expressions are allowed to have the following forms: + // x binop= expr; -> xrval + expr; + // x++, ++x -> xrval + 1; + // x--, --x -> xrval - 1; + // x = x binop expr; -> xrval binop expr + // x = expr Op x; - > expr binop xrval; + auto *LHS = cast(BOUE->getLHS()->IgnoreImpCasts()); + auto *RHS = cast(BOUE->getRHS()->IgnoreImpCasts()); + auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; + NewVValType = XRValExpr->getType(); + auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; + auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, + IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { + CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); + CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); + RValue Res = CGF.EmitAnyExpr(UE); + NewVVal = IsPostfixUpdate ? XRValue : Res; + return Res; + }; + auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( + XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); + if (Res.first) { + // 'atomicrmw' instruction was generated. + if (IsPostfixUpdate) { + // Use old value from 'atomicrmw'. + NewVVal = Res.second; + } else { + // 'atomicrmw' does not provide new value, so evaluate it using old + // value of 'x'. + CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); + CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); + NewVVal = CGF.EmitAnyExpr(UE); + } + } + } else { + // 'x' is simply rewritten with some 'expr'. + NewVValType = X->getType().getNonReferenceType(); + ExprRValue = convertToType(CGF, ExprRValue, E->getType(), + X->getType().getNonReferenceType()); + auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { + NewVVal = XRValue; + return ExprRValue; + }; + // Try to perform atomicrmw xchg, otherwise simple exchange. + auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( + XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, + Loc, Gen); + if (Res.first) { + // 'atomicrmw' instruction was generated. + NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; + } + } + // Emit post-update store to 'v' of old/new 'x' value. + emitSimpleStore(CGF, VLValue, NewVVal, NewVValType); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); +} + +static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, + bool IsSeqCst, bool IsPostfixUpdate, + const Expr *X, const Expr *V, const Expr *E, + const Expr *UE, bool IsXLHSInRHSPart, + SourceLocation Loc) { + switch (Kind) { + case OMPC_read: + EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); + break; + case OMPC_write: + EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); + break; + case OMPC_unknown: + case OMPC_update: + EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); + break; + case OMPC_capture: + EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, + IsXLHSInRHSPart, Loc); + break; + case OMPC_if: + case OMPC_final: + case OMPC_num_threads: + case OMPC_private: + case OMPC_firstprivate: + case OMPC_lastprivate: + case OMPC_reduction: + case OMPC_safelen: + case OMPC_collapse: + case OMPC_default: + case OMPC_seq_cst: + case OMPC_shared: + case OMPC_linear: + case OMPC_aligned: + case OMPC_copyin: + case OMPC_copyprivate: + case OMPC_flush: + case OMPC_proc_bind: + case OMPC_schedule: + case OMPC_ordered: + case OMPC_nowait: + case OMPC_untied: + case OMPC_threadprivate: + case OMPC_mergeable: + llvm_unreachable("Clause is not allowed in 'omp atomic'."); + } +} + +void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { + bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); + OpenMPClauseKind Kind = OMPC_unknown; + for (auto *C : S.clauses()) { + // Find first clause (skip seq_cst clause, if it is first). + if (C->getClauseKind() != OMPC_seq_cst) { + Kind = C->getClauseKind(); + break; + } + } + + const auto *CS = + S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); + if (const auto *EWC = dyn_cast(CS)) { + enterFullExpression(EWC); + } + // Processing for statements under 'atomic capture'. + if (const auto *Compound = dyn_cast(CS)) { + for (const auto *C : Compound->body()) { + if (const auto *EWC = dyn_cast(C)) { + enterFullExpression(EWC); + } + } + } + + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) { + EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), + S.getV(), S.getExpr(), S.getUpdateExpr(), + S.isXLHSInRHSPart(), S.getLocStart()); + }; + CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); } void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { @@ -702,4 +2015,3 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); } - -- cgit v1.2.3