diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp | 1040 |
1 files changed, 778 insertions, 262 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp index e5f507aa41bd..bcd2ac51929d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -20,21 +20,195 @@ using namespace clang; using namespace CodeGen; +void CodeGenFunction::GenerateOpenMPCapturedVars( + const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { + const RecordDecl *RD = S.getCapturedRecordDecl(); + auto CurField = RD->field_begin(); + auto CurCap = S.captures().begin(); + for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), + E = S.capture_init_end(); + I != E; ++I, ++CurField, ++CurCap) { + if (CurField->hasCapturedVLAType()) { + auto VAT = CurField->getCapturedVLAType(); + auto *Val = VLASizeMap[VAT->getSizeExpr()]; + CapturedVars.push_back(Val); + } else if (CurCap->capturesThis()) + CapturedVars.push_back(CXXThisValue); + else if (CurCap->capturesVariableByCopy()) + CapturedVars.push_back( + EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal()); + else { + assert(CurCap->capturesVariable() && "Expected capture by reference."); + CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); + } + } +} + +static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, + StringRef Name, LValue AddrLV, + bool isReferenceType = false) { + ASTContext &Ctx = CGF.getContext(); + + auto *CastedPtr = CGF.EmitScalarConversion( + AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), + Ctx.getPointerType(DstType), SourceLocation()); + auto TmpAddr = + CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) + .getAddress(); + + // If we are dealing with references we need to return the address of the + // reference instead of the reference of the value. + if (isReferenceType) { + QualType RefType = Ctx.getLValueReferenceType(DstType); + auto *RefVal = TmpAddr.getPointer(); + TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref"); + auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); + CGF.EmitScalarInit(RefVal, TmpLVal); + } + + return TmpAddr; +} + +llvm::Function * +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + const RecordDecl *RD = S.getCapturedRecordDecl(); + assert(CD->hasBody() && "missing CapturedDecl body"); + + // Build the argument list. + ASTContext &Ctx = CGM.getContext(); + FunctionArgList Args; + Args.append(CD->param_begin(), + std::next(CD->param_begin(), CD->getContextParamPosition())); + auto I = S.captures().begin(); + for (auto *FD : RD->fields()) { + QualType ArgType = FD->getType(); + IdentifierInfo *II = nullptr; + VarDecl *CapVar = nullptr; + + // If this is a capture by copy and the type is not a pointer, the outlined + // function argument type should be uintptr and the value properly casted to + // uintptr. This is necessary given that the runtime library is only able to + // deal with pointers. We can pass in the same way the VLA type sizes to the + // outlined function. + if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || + I->capturesVariableArrayType()) + ArgType = Ctx.getUIntPtrType(); + + if (I->capturesVariable() || I->capturesVariableByCopy()) { + CapVar = I->getCapturedVar(); + II = CapVar->getIdentifier(); + } else if (I->capturesThis()) + II = &getContext().Idents.get("this"); + else { + assert(I->capturesVariableArrayType()); + II = &getContext().Idents.get("vla"); + } + if (ArgType->isVariablyModifiedType()) + ArgType = getContext().getVariableArrayDecayedType(ArgType); + Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, + FD->getLocation(), II, ArgType)); + ++I; + } + Args.append( + std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); + + // Create the function declaration. + FunctionType::ExtInfo ExtInfo; + const CGFunctionInfo &FuncInfo = + CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, + /*IsVariadic=*/false); + llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); + + llvm::Function *F = llvm::Function::Create( + FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + CapturedStmtInfo->getHelperName(), &CGM.getModule()); + CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); + if (CD->isNothrow()) + F->addFnAttr(llvm::Attribute::NoUnwind); + + // Generate the function. + StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), + CD->getBody()->getLocStart()); + unsigned Cnt = CD->getContextParamPosition(); + I = S.captures().begin(); + for (auto *FD : RD->fields()) { + // If we are capturing a pointer by copy we don't need to do anything, just + // use the value that we get from the arguments. + if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { + setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt])); + ++Cnt, ++I; + continue; + } + + LValue ArgLVal = + MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), + AlignmentSource::Decl); + if (FD->hasCapturedVLAType()) { + LValue CastedArgLVal = + MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), + Args[Cnt]->getName(), ArgLVal), + FD->getType(), AlignmentSource::Decl); + auto *ExprArg = + EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); + auto VAT = FD->getCapturedVLAType(); + VLASizeMap[VAT->getSizeExpr()] = ExprArg; + } else if (I->capturesVariable()) { + auto *Var = I->getCapturedVar(); + QualType VarTy = Var->getType(); + Address ArgAddr = ArgLVal.getAddress(); + if (!VarTy->isReferenceType()) { + ArgAddr = EmitLoadOfReference( + ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); + } + setAddrOfLocalVar( + Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); + } else if (I->capturesVariableByCopy()) { + assert(!FD->getType()->isAnyPointerType() && + "Not expecting a captured pointer."); + auto *Var = I->getCapturedVar(); + QualType VarTy = Var->getType(); + setAddrOfLocalVar(I->getCapturedVar(), + castValueFromUintptr(*this, FD->getType(), + Args[Cnt]->getName(), ArgLVal, + VarTy->isReferenceType())); + } else { + // If 'this' is captured, load it into CXXThisValue. + assert(I->capturesThis()); + CXXThisValue = + EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); + } + ++Cnt, ++I; + } + + PGO.assignRegionCounters(GlobalDecl(CD), F); + CapturedStmtInfo->EmitBody(*this, CD->getBody()); + FinishFunction(CD->getBodyRBrace()); + + return F; +} + //===----------------------------------------------------------------------===// // OpenMP Directive Emission //===----------------------------------------------------------------------===// void CodeGenFunction::EmitOMPAggregateAssign( - llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType, - const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) { + Address DestAddr, Address SrcAddr, QualType OriginalType, + const llvm::function_ref<void(Address, Address)> &CopyGen) { // Perform element-by-element initialization. QualType ElementTy; - auto SrcBegin = SrcAddr; - auto DestBegin = DestAddr; + + // Drill down to the base element type on both arrays. auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); - auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); + auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); + SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + + auto SrcBegin = SrcAddr.getPointer(); + auto DestBegin = DestAddr.getPointer(); // Cast from pointer to array type to pointer to single element. - SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin, - DestBegin->getType()); auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); // The basic structure here is a while-do loop. auto BodyBB = createBasicBlock("omp.arraycpy.body"); @@ -46,77 +220,144 @@ void CodeGenFunction::EmitOMPAggregateAssign( // Enter the loop body, making that address the current address. auto EntryBB = Builder.GetInsertBlock(); EmitBlock(BodyBB); - auto SrcElementCurrent = - Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); - SrcElementCurrent->addIncoming(SrcBegin, EntryBB); - auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2, - "omp.arraycpy.destElementPast"); - DestElementCurrent->addIncoming(DestBegin, EntryBB); + + CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); + + llvm::PHINode *SrcElementPHI = + Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); + SrcElementPHI->addIncoming(SrcBegin, EntryBB); + Address SrcElementCurrent = + Address(SrcElementPHI, + SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + llvm::PHINode *DestElementPHI = + Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + DestElementPHI->addIncoming(DestBegin, EntryBB); + Address DestElementCurrent = + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); // Emit copy. CopyGen(DestElementCurrent, SrcElementCurrent); // Shift the address forward by one element. auto DestElementNext = Builder.CreateConstGEP1_32( - DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element"); + DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); auto SrcElementNext = Builder.CreateConstGEP1_32( - SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element"); + SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); // Check whether we've reached the end. auto Done = Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); Builder.CreateCondBr(Done, DoneBB, BodyBB); - DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock()); - SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock()); + DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); + SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); // Done. EmitBlock(DoneBB, /*IsFinished=*/true); } -void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF, - QualType OriginalType, llvm::Value *DestAddr, - llvm::Value *SrcAddr, const VarDecl *DestVD, +/// \brief Emit initialization of arrays of complex types. +/// \param DestAddr Address of the array. +/// \param Type Type of array. +/// \param Init Initial expression of array. +static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, + QualType Type, const Expr *Init) { + // Perform element-by-element initialization. + QualType ElementTy; + + // Drill down to the base element type on both arrays. + auto ArrayTy = Type->getAsArrayTypeUnsafe(); + auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); + DestAddr = + CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); + + auto DestBegin = DestAddr.getPointer(); + // Cast from pointer to array type to pointer to single element. + auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); + // The basic structure here is a while-do loop. + auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); + auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); + auto IsEmpty = + CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); + CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + auto EntryBB = CGF.Builder.GetInsertBlock(); + CGF.EmitBlock(BodyBB); + + CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); + + llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( + DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + DestElementPHI->addIncoming(DestBegin, EntryBB); + Address DestElementCurrent = + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + // Emit copy. + { + CodeGenFunction::RunCleanupsScope InitScope(CGF); + CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), + /*IsInitializer=*/false); + } + + // Shift the address forward by one element. + auto DestElementNext = CGF.Builder.CreateConstGEP1_32( + DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + // Check whether we've reached the end. + auto Done = + CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); + CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); + DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); + + // Done. + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + +void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, + Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy) { if (OriginalType->isArrayType()) { auto *BO = dyn_cast<BinaryOperator>(Copy); if (BO && BO->getOpcode() == BO_Assign) { // Perform simple memcpy for simple copying. - CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); + EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); } else { // For arrays with complex element types perform element by element // copying. - CGF.EmitOMPAggregateAssign( + EmitOMPAggregateAssign( DestAddr, SrcAddr, OriginalType, - [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement, - llvm::Value *SrcElement) { + [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { // Working with the single array element, so have to remap // destination and source variables to corresponding array // elements. - CodeGenFunction::OMPPrivateScope Remap(CGF); - Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{ + CodeGenFunction::OMPPrivateScope Remap(*this); + Remap.addPrivate(DestVD, [DestElement]() -> Address { return DestElement; }); Remap.addPrivate( - SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; }); + SrcVD, [SrcElement]() -> Address { return SrcElement; }); (void)Remap.Privatize(); - CGF.EmitIgnoredExpr(Copy); + EmitIgnoredExpr(Copy); }); } } else { // Remap pseudo source variable to private copy. - CodeGenFunction::OMPPrivateScope Remap(CGF); - Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; }); - Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; }); + CodeGenFunction::OMPPrivateScope Remap(*this); + Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; }); + Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; }); (void)Remap.Privatize(); // Emit copying of the whole variable. - CGF.EmitIgnoredExpr(Copy); + EmitIgnoredExpr(Copy); } } bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { + if (!HaveInsertPoint()) + return false; llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; - for (auto &&I = D.getClausesOfKind(OMPC_firstprivate); I; ++I) { - auto *C = cast<OMPFirstprivateClause>(*I); + for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { auto IRef = C->varlist_begin(); auto InitsRef = C->inits().begin(); for (auto IInit : C->private_copies()) { @@ -131,13 +372,13 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( OrigVD) != nullptr, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); - auto *OriginalAddr = EmitLValue(&DRE).getAddress(); + Address OriginalAddr = EmitLValue(&DRE).getAddress(); QualType Type = OrigVD->getType(); if (Type->isArrayType()) { // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current // captured region. - IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ + IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { auto Emission = EmitAutoVarAlloca(*VD); auto *Init = VD->getInit(); if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { @@ -147,12 +388,12 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, } else { EmitOMPAggregateAssign( Emission.getAllocatedAddress(), OriginalAddr, Type, - [this, VDInit, Init](llvm::Value *DestElement, - llvm::Value *SrcElement) { + [this, VDInit, Init](Address DestElement, + Address SrcElement) { // Clean up any temporaries needed by the initialization. RunCleanupsScope InitScope(*this); // Emit initialization for single element. - LocalDeclMap[VDInit] = SrcElement; + setAddrOfLocalVar(VDInit, SrcElement); EmitAnyExprToMem(Init, DestElement, Init->getType().getQualifiers(), /*IsInitializer*/ false); @@ -163,12 +404,12 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, return Emission.getAllocatedAddress(); }); } else { - IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ + IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { // Emit private VarDecl with copy init. // Remap temp VDInit variable to the address of the original // variable // (for proper handling of captured global variables). - LocalDeclMap[VDInit] = OriginalAddr; + setAddrOfLocalVar(VDInit, OriginalAddr); EmitDecl(*VD); LocalDeclMap.erase(VDInit); return GetAddrOfLocalVar(VD); @@ -188,16 +429,17 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, void CodeGenFunction::EmitOMPPrivateClause( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { + if (!HaveInsertPoint()) + return; llvm::DenseSet<const VarDecl *> EmittedAsPrivate; - for (auto &&I = D.getClausesOfKind(OMPC_private); I; ++I) { - auto *C = cast<OMPPrivateClause>(*I); + for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { auto IRef = C->varlist_begin(); for (auto IInit : C->private_copies()) { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); bool IsRegistered = - PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ + PrivateScope.addPrivate(OrigVD, [&]() -> Address { // Emit private VarDecl with copy init. EmitDecl(*VD); return GetAddrOfLocalVar(VD); @@ -212,14 +454,15 @@ void CodeGenFunction::EmitOMPPrivateClause( } bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { + if (!HaveInsertPoint()) + return false; // threadprivate_var1 = master_threadprivate_var1; // operator=(threadprivate_var2, master_threadprivate_var2); // ... // __kmpc_barrier(&loc, global_tid); llvm::DenseSet<const VarDecl *> CopiedVars; llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; - for (auto &&I = D.getClausesOfKind(OMPC_copyin); I; ++I) { - auto *C = cast<OMPCopyinClause>(*I); + for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { auto IRef = C->varlist_begin(); auto ISrcRef = C->source_exprs().begin(); auto IDestRef = C->destination_exprs().begin(); @@ -231,7 +474,7 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { // Get the address of the master variable. If we are emitting code with // TLS support, the address is passed from the master as field in the // captured declaration. - llvm::Value *MasterAddr; + Address MasterAddr = Address::invalid(); if (getLangOpts().OpenMPUseTLS && getContext().getTargetInfo().isTLSSupported()) { assert(CapturedStmtInfo->lookup(VD) && @@ -239,12 +482,15 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); MasterAddr = EmitLValue(&DRE).getAddress(); + LocalDeclMap.erase(VD); } else { - MasterAddr = VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) - : CGM.GetAddrOfGlobal(VD); + MasterAddr = + Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) + : CGM.GetAddrOfGlobal(VD), + getContext().getDeclAlign(VD)); } // Get the address of the threadprivate variable. - auto *PrivateAddr = EmitLValue(*IRef).getAddress(); + Address PrivateAddr = EmitLValue(*IRef).getAddress(); if (CopiedVars.size() == 1) { // At first check if current thread is a master thread. If it is, no // need to copy data. @@ -252,15 +498,14 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { CopyEnd = createBasicBlock("copyin.not.master.end"); Builder.CreateCondBr( Builder.CreateICmpNE( - Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy), - Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)), + Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), + Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)), CopyBegin, CopyEnd); EmitBlock(CopyBegin); } auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); - EmitOMPCopy(*this, Type, PrivateAddr, MasterAddr, DestVD, SrcVD, - AssignOp); + EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); } ++IRef; ++ISrcRef; @@ -277,11 +522,12 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { bool CodeGenFunction::EmitOMPLastprivateClauseInit( const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { + if (!HaveInsertPoint()) + return false; bool HasAtLeastOneLastprivate = false; llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; - for (auto &&I = D.getClausesOfKind(OMPC_lastprivate); I; ++I) { + for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { HasAtLeastOneLastprivate = true; - auto *C = cast<OMPLastprivateClause>(*I); auto IRef = C->varlist_begin(); auto IDestRef = C->destination_exprs().begin(); for (auto *IInit : C->private_copies()) { @@ -290,7 +536,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); - PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{ + PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { DeclRefExpr DRE( const_cast<VarDecl *>(OrigVD), /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( @@ -304,7 +550,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( if (IInit) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); bool IsRegistered = - PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ + PrivateScope.addPrivate(OrigVD, [&]() -> Address { // Emit private VarDecl with copy init. EmitDecl(*VD); return GetAddrOfLocalVar(VD); @@ -322,6 +568,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( void CodeGenFunction::EmitOMPLastprivateClauseFinal( const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { + if (!HaveInsertPoint()) + return; // Emit following code: // if (<IsLastIterCond>) { // orig_var1 = private_orig_var1; @@ -359,8 +607,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( { llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; bool FirstLCV = true; - for (auto &&I = D.getClausesOfKind(OMPC_lastprivate); I; ++I) { - auto *C = cast<OMPLastprivateClause>(*I); + for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { auto IRef = C->varlist_begin(); auto ISrcRef = C->source_exprs().begin(); auto IDestRef = C->destination_exprs().begin(); @@ -385,11 +632,14 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); // Get the address of the original variable. - auto *OriginalAddr = GetAddrOfLocalVar(DestVD); + Address OriginalAddr = GetAddrOfLocalVar(DestVD); // Get the address of the private variable. - auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD); - EmitOMPCopy(*this, Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, - AssignOp); + Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); + if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>()) + PrivateAddr = + Address(Builder.CreateLoad(PrivateAddr), + getNaturalTypeAlignment(RefTy->getPointeeType())); + EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); } ++IRef; ++ISrcRef; @@ -405,46 +655,174 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { - for (auto &&I = D.getClausesOfKind(OMPC_reduction); I; ++I) { - auto *C = cast<OMPReductionClause>(*I); + if (!HaveInsertPoint()) + return; + for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { auto ILHS = C->lhs_exprs().begin(); auto IRHS = C->rhs_exprs().begin(); + auto IPriv = C->privates().begin(); for (auto IRef : C->varlists()) { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{ - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - return EmitLValue(&DRE).getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = - PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{ - // Emit private VarDecl with reduction init. - EmitDecl(*PrivateVD); - return GetAddrOfLocalVar(PrivateVD); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - ++ILHS, ++IRHS; + auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); + if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { + auto *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + auto *DE = cast<DeclRefExpr>(Base); + auto *OrigVD = cast<VarDecl>(DE->getDecl()); + auto OASELValueLB = EmitOMPArraySectionExpr(OASE); + auto OASELValueUB = + EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); + auto OriginalBaseLValue = EmitLValue(DE); + auto BaseLValue = OriginalBaseLValue; + auto *Zero = Builder.getInt64(/*C=*/0); + llvm::SmallVector<llvm::Value *, 4> Indexes; + Indexes.push_back(Zero); + auto *ItemTy = + OASELValueLB.getPointer()->getType()->getPointerElementType(); + auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); + while (Ty != ItemTy) { + Indexes.push_back(Zero); + Ty = Ty->getPointerElementType(); + } + BaseLValue = MakeAddrLValue( + Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), + OASELValueLB.getAlignment()), + OASELValueLB.getType(), OASELValueLB.getAlignmentSource()); + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { + return OASELValueLB.getAddress(); + }); + // Emit reduction copy. + bool IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB, + OriginalBaseLValue]() -> Address { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), + OASELValueLB.getPointer()); + Size = Builder.CreateNUWAdd( + Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); + CodeGenFunction::OpaqueValueMapping OpaqueMap( + *this, cast<OpaqueValueExpr>( + getContext() + .getAsVariableArrayType(PrivateVD->getType()) + ->getSizeExpr()), + RValue::get(Size)); + EmitVariablyModifiedType(PrivateVD->getType()); + auto Emission = EmitAutoVarAlloca(*PrivateVD); + auto Addr = Emission.getAllocatedAddress(); + auto *Init = PrivateVD->getInit(); + EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init); + EmitAutoVarCleanups(Emission); + // Emit private VarDecl with reduction init. + auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), + OASELValueLB.getPointer()); + auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); + Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( + Ptr, OriginalBaseLValue.getPointer()->getType()); + return Address(Ptr, OriginalBaseLValue.getAlignment()); + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { + auto *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + auto *DE = cast<DeclRefExpr>(Base); + auto *OrigVD = cast<VarDecl>(DE->getDecl()); + auto ASELValue = EmitLValue(ASE); + auto OriginalBaseLValue = EmitLValue(DE); + auto BaseLValue = OriginalBaseLValue; + auto *Zero = Builder.getInt64(/*C=*/0); + llvm::SmallVector<llvm::Value *, 4> Indexes; + Indexes.push_back(Zero); + auto *ItemTy = + ASELValue.getPointer()->getType()->getPointerElementType(); + auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType(); + while (Ty != ItemTy) { + Indexes.push_back(Zero); + Ty = Ty->getPointerElementType(); + } + BaseLValue = MakeAddrLValue( + Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes), + ASELValue.getAlignment()), + ASELValue.getType(), ASELValue.getAlignmentSource()); + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { + return ASELValue.getAddress(); + }); + // Emit reduction copy. + bool IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, PrivateVD, BaseLValue, ASELValue, + OriginalBaseLValue]() -> Address { + // Emit private VarDecl with reduction init. + EmitDecl(*PrivateVD); + auto Addr = GetAddrOfLocalVar(PrivateVD); + auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), + ASELValue.getPointer()); + auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); + Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast( + Ptr, OriginalBaseLValue.getPointer()->getType()); + return Address(Ptr, OriginalBaseLValue.getAlignment()); + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } else { + auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address { + DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + IRef->getType(), VK_LValue, IRef->getExprLoc()); + return EmitLValue(&DRE).getAddress(); + }); + // Emit reduction copy. + bool IsRegistered = + PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address { + // Emit private VarDecl with reduction init. + EmitDecl(*PrivateVD); + return GetAddrOfLocalVar(PrivateVD); + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } + ++ILHS, ++IRHS, ++IPriv; } } } void CodeGenFunction::EmitOMPReductionClauseFinal( const OMPExecutableDirective &D) { + if (!HaveInsertPoint()) + return; + llvm::SmallVector<const Expr *, 8> Privates; llvm::SmallVector<const Expr *, 8> LHSExprs; llvm::SmallVector<const Expr *, 8> RHSExprs; llvm::SmallVector<const Expr *, 8> ReductionOps; bool HasAtLeastOneReduction = false; - for (auto &&I = D.getClausesOfKind(OMPC_reduction); I; ++I) { + for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { HasAtLeastOneReduction = true; - auto *C = cast<OMPReductionClause>(*I); + Privates.append(C->privates().begin(), C->privates().end()); LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); @@ -453,8 +831,8 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( - *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps, - D.getSingleClause(OMPC_nowait) || + *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, + D.getSingleClause<OMPNowaitClause>() || isOpenMPParallelDirective(D.getDirectiveKind()) || D.getDirectiveKind() == OMPD_simd, D.getDirectiveKind() == OMPD_simd); @@ -466,29 +844,32 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); - if (auto C = S.getSingleClause(OMPC_num_threads)) { + if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); - auto NumThreadsClause = cast<OMPNumThreadsClause>(C); auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), /*IgnoreResultAssign*/ true); CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( CGF, NumThreads, NumThreadsClause->getLocStart()); } - if (auto *C = S.getSingleClause(OMPC_proc_bind)) { + if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); - auto *ProcBindClause = cast<OMPProcBindClause>(C); CGF.CGM.getOpenMPRuntime().emitProcBindClause( CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); } const Expr *IfCond = nullptr; - if (auto C = S.getSingleClause(OMPC_if)) { - IfCond = cast<OMPIfClause>(C)->getCondition(); + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_parallel) { + IfCond = C->getCondition(); + break; + } } CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, - CapturedStruct, IfCond); + CapturedVars, IfCond); } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { @@ -503,17 +884,15 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // initialization of firstprivate variables or propagation master's thread // values of threadprivate variables to local instances of that variables // of all other implicit threads. - CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), - OMPD_unknown); + CGF.CGM.getOpenMPRuntime().emitBarrierCall( + CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); } CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S); - // Emit implicit barrier at the end of the 'parallel' directive. - CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), - OMPD_unknown); }; emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); } @@ -526,8 +905,7 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, EmitIgnoredExpr(I); } // Update the linear variables. - for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) { - auto *C = cast<OMPLinearClause>(*I); + for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { for (auto U : C->updates()) { EmitIgnoredExpr(U); } @@ -595,9 +973,10 @@ void CodeGenFunction::EmitOMPInnerLoop( } void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { + if (!HaveInsertPoint()) + return; // Emit inits for the linear variables. - for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) { - auto *C = cast<OMPLinearClause>(*I); + for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { for (auto Init : C->inits()) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); auto *OrigVD = cast<VarDecl>( @@ -608,8 +987,7 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { VD->getInit()->getExprLoc()); AutoVarEmission Emission = EmitAutoVarAlloca(*VD); EmitExprAsInit(&DRE, VD, - MakeAddrLValue(Emission.getAllocatedAddress(), - VD->getType(), Emission.Alignment), + MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), /*capturedByInit=*/false); EmitAutoVarCleanups(Emission); } @@ -626,19 +1004,20 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { static void emitLinearClauseFinal(CodeGenFunction &CGF, const OMPLoopDirective &D) { + if (!CGF.HaveInsertPoint()) + return; // Emit the final values of the linear variables. - for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) { - auto *C = cast<OMPLinearClause>(*I); + for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { auto IC = C->varlist_begin(); for (auto F : C->finals()) { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); - auto *OrigAddr = CGF.EmitLValue(&DRE).getAddress(); + Address OrigAddr = CGF.EmitLValue(&DRE).getAddress(); CodeGenFunction::OMPPrivateScope VarScope(CGF); VarScope.addPrivate(OrigVD, - [OrigAddr]() -> llvm::Value *{ return OrigAddr; }); + [OrigAddr]() -> Address { return OrigAddr; }); (void)VarScope.Privatize(); CGF.EmitIgnoredExpr(F); ++IC; @@ -648,8 +1027,9 @@ static void emitLinearClauseFinal(CodeGenFunction &CGF, static void emitAlignedClause(CodeGenFunction &CGF, const OMPExecutableDirective &D) { - for (auto &&I = D.getClausesOfKind(OMPC_aligned); I; ++I) { - auto *Clause = cast<OMPAlignedClause>(*I); + if (!CGF.HaveInsertPoint()) + return; + for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { unsigned ClauseAlignment = 0; if (auto AlignmentExpr = Clause->getAlignment()) { auto AlignmentCI = @@ -680,24 +1060,36 @@ static void emitAlignedClause(CodeGenFunction &CGF, static void emitPrivateLoopCounters(CodeGenFunction &CGF, CodeGenFunction::OMPPrivateScope &LoopScope, - ArrayRef<Expr *> Counters) { + ArrayRef<Expr *> Counters, + ArrayRef<Expr *> PrivateCounters) { + if (!CGF.HaveInsertPoint()) + return; + auto I = PrivateCounters.begin(); for (auto *E : Counters) { - auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - (void)LoopScope.addPrivate(VD, [&]() -> llvm::Value *{ + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); + Address Addr = Address::invalid(); + (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address { // Emit var without initialization. - auto VarEmission = CGF.EmitAutoVarAlloca(*VD); + auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD); CGF.EmitAutoVarCleanups(VarEmission); - return VarEmission.getAllocatedAddress(); + Addr = VarEmission.getAllocatedAddress(); + return Addr; }); + (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; }); + ++I; } } static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { + if (!CGF.HaveInsertPoint()) + return; { CodeGenFunction::OMPPrivateScope PreCondScope(CGF); - emitPrivateLoopCounters(CGF, PreCondScope, S.counters()); + emitPrivateLoopCounters(CGF, PreCondScope, S.counters(), + S.private_counters()); (void)PreCondScope.Privatize(); // Get initial values of real counters. for (auto I : S.inits()) { @@ -711,31 +1103,45 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, static void emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { - for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) { - auto *C = cast<OMPLinearClause>(*I); + if (!CGF.HaveInsertPoint()) + return; + for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { + auto CurPrivate = C->privates().begin(); for (auto *E : C->varlists()) { - auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * { - // Emit var without initialization. - auto VarEmission = CGF.EmitAutoVarAlloca(*VD); - CGF.EmitAutoVarCleanups(VarEmission); - return VarEmission.getAllocatedAddress(); + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); + bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address { + // Emit private VarDecl with copy init. + CGF.EmitVarDecl(*PrivateVD); + return CGF.GetAddrOfLocalVar(PrivateVD); }); assert(IsRegistered && "linear var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; + ++CurPrivate; } } } -static void emitSafelenClause(CodeGenFunction &CGF, - const OMPExecutableDirective &D) { - if (auto *C = - cast_or_null<OMPSafelenClause>(D.getSingleClause(OMPC_safelen))) { +static void emitSimdlenSafelenClause(CodeGenFunction &CGF, + const OMPExecutableDirective &D) { + if (!CGF.HaveInsertPoint()) + return; + if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { + RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), + /*ignoreResult=*/true); + llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); + CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); + // In presence of finite 'safelen', it may be unsafe to mark all + // the memory instructions parallel, because loop-carried + // dependences of 'safelen' iterations are possible. + CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); + } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), /*ignoreResult=*/true); llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); - CGF.LoopStack.setVectorizerWidth(Val->getZExtValue()); + CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); // In presence of finite 'safelen', it may be unsafe to mark all // the memory instructions parallel, because loop-carried // dependences of 'safelen' iterations are possible. @@ -746,22 +1152,24 @@ static void emitSafelenClause(CodeGenFunction &CGF, void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { // Walk clauses and process safelen/lastprivate. LoopStack.setParallel(); - LoopStack.setVectorizerEnable(true); - emitSafelenClause(*this, D); + LoopStack.setVectorizeEnable(true); + emitSimdlenSafelenClause(*this, D); } void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { + if (!HaveInsertPoint()) + return; auto IC = D.counters().begin(); for (auto F : D.finals()) { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); - if (LocalDeclMap.lookup(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { + if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) { DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); - auto *OrigAddr = EmitLValue(&DRE).getAddress(); + Address OrigAddr = EmitLValue(&DRE).getAddress(); OMPPrivateScope VarScope(*this); VarScope.addPrivate(OrigVD, - [OrigAddr]() -> llvm::Value *{ return OrigAddr; }); + [OrigAddr]() -> Address { return OrigAddr; }); (void)VarScope.Privatize(); EmitIgnoredExpr(F); } @@ -817,7 +1225,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { bool HasLastprivateClause; { OMPPrivateScope LoopScope(CGF); - emitPrivateLoopCounters(CGF, LoopScope, S.counters()); + emitPrivateLoopCounters(CGF, LoopScope, S.counters(), + S.private_counters()); emitPrivateLinearVars(CGF, S, LoopScope); CGF.EmitOMPPrivateClause(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); @@ -849,9 +1258,9 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, - bool Ordered, llvm::Value *LB, - llvm::Value *UB, llvm::Value *ST, - llvm::Value *IL, llvm::Value *Chunk) { + bool Ordered, Address LB, + Address UB, Address ST, + Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). @@ -915,11 +1324,14 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - RT.emitForInit( - *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, IL, LB, - (DynamicOrOrdered ? EmitAnyExpr(S.getLastIteration()).getScalarVal() - : UB), - ST, Chunk); + if (DynamicOrOrdered) { + llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); + RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, + IVSize, IVSigned, Ordered, UBVal, Chunk); + } else { + RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, + IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); + } auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); @@ -1019,8 +1431,7 @@ emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, // Detect the loop schedule kind and chunk. auto ScheduleKind = OMPC_SCHEDULE_unknown; llvm::Value *Chunk = nullptr; - if (auto *C = - cast_or_null<OMPScheduleClause>(S.getSingleClause(OMPC_schedule))) { + if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { ScheduleKind = C->getScheduleKind(); if (const auto *Ch = C->getChunkSize()) { if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) { @@ -1029,8 +1440,8 @@ emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitVarDecl(*ImpVar); CGF.EmitStoreThroughLValue( CGF.EmitAnyExpr(Ch), - CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(ImpVar), - ImpVar->getType())); + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar), + ImpVar->getType())); } else { Ch = ImpRef; } @@ -1038,7 +1449,8 @@ emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S, if (!C->getHelperChunkSize() || !OuterRegion) { Chunk = CGF.EmitScalarExpr(Ch); Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(), - S.getIterationVariable()->getType()); + S.getIterationVariable()->getType(), + S.getLocStart()); } } } @@ -1100,13 +1512,15 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { if (EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables. - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), - OMPD_unknown); + CGM.getOpenMPRuntime().emitBarrierCall( + *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); } EmitOMPPrivateClause(S, LoopScope); HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPReductionClauseInit(S, LoopScope); - emitPrivateLoopCounters(*this, LoopScope, S.counters()); + emitPrivateLoopCounters(*this, LoopScope, S.counters(), + S.private_counters()); emitPrivateLinearVars(*this, S, LoopScope); (void)LoopScope.Privatize(); @@ -1119,7 +1533,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { ScheduleKind = ScheduleInfo.second; const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - const bool Ordered = S.getSingleClause(OMPC_ordered) != nullptr; + const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; if (RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && !Ordered) { @@ -1131,9 +1545,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // chunks that are approximately equal in size, and at most one chunk is // distributed to each thread. Note that the size of the chunks is // unspecified in this case. - RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - Ordered, IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, + IVSize, IVSigned, Ordered, + IL.getAddress(), LB.getAddress(), + UB.getAddress(), ST.getAddress()); auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); EmitIgnoredExpr(S.getEnsureUpperBound()); @@ -1181,10 +1596,11 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { HasLastprivates = CGF.EmitOMPWorksharingLoop(S); }; - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, + S.hasCancel()); // Emit an implicit barrier at the end. - if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) { + if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); } } @@ -1198,7 +1614,7 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); // Emit an implicit barrier at the end. - if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) { + if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); } } @@ -1206,7 +1622,7 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, const Twine &Name, llvm::Value *Init = nullptr) { - auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); + auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); if (Init) CGF.EmitScalarInit(Init, LVal); return LVal; @@ -1276,8 +1692,9 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables. - CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), - OMPD_unknown); + CGF.CGM.getOpenMPRuntime().emitBarrierCall( + CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); } CGF.EmitOMPPrivateClause(S, LoopScope); HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); @@ -1285,7 +1702,7 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { (void)LoopScope.Privatize(); // Emit static non-chunked loop. - CGF.CGM.getOpenMPRuntime().emitForInit( + CGF.CGM.getOpenMPRuntime().emitForStaticInit( CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), UB.getAddress(), ST.getAddress()); @@ -1310,11 +1727,17 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CGF.EmitLoadOfScalar(IL, S.getLocStart()))); }; - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen); + bool HasCancel = false; + if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) + HasCancel = OSD->hasCancel(); + else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) + HasCancel = OPSD->hasCancel(); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, + HasCancel); // Emit barrier for lastprivates only if 'sections' directive has 'nowait' // clause. Otherwise the barrier will be generated by the codegen for the // directive. - if (HasLastprivates && S.getSingleClause(OMPC_nowait)) { + if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), @@ -1327,11 +1750,11 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { bool HasFirstprivates; // No need to generate reductions for sections with single section region, we // can use original shared variables for all operations. - bool HasReductions = !S.getClausesOfKind(OMPC_reduction).empty(); + bool HasReductions = S.hasClausesOfKind<OMPReductionClause>(); // No need to generate lastprivates for sections with single section region, // we can use original shared variable for all calculations with barrier at // the end of the sections. - bool HasLastprivates = !S.getClausesOfKind(OMPC_lastprivate).empty(); + bool HasLastprivates = S.hasClausesOfKind<OMPLastprivateClause>(); auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) { CodeGenFunction::OMPPrivateScope SingleScope(CGF); HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); @@ -1347,10 +1770,12 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // 'sections' directive has 'nowait' clause. Otherwise the barrier will be // generated by the codegen for the directive. if ((HasFirstprivates || HasLastprivates || HasReductions) && - S.getSingleClause(OMPC_nowait)) { + S.getSingleClause<OMPNowaitClause>()) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables. - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown); + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown, + /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); } return OMPD_single; } @@ -1359,7 +1784,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); OpenMPDirectiveKind EmittedAs = EmitSections(S); // Emit an implicit barrier at the end. - if (!S.getSingleClause(OMPC_nowait)) { + if (!S.getSingleClause<OMPNowaitClause>()) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); } } @@ -1368,9 +1793,9 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - CGF.EnsureInsertPoint(); }; - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, + S.hasCancel()); } void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { @@ -1383,8 +1808,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { // construct. // Build a list of copyprivate variables along with helper expressions // (<source>, <destination>, <destination>=<source> expressions) - for (auto &&I = S.getClausesOfKind(OMPC_copyprivate); I; ++I) { - auto *C = cast<OMPCopyprivateClause>(*I); + for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); DestExprs.append(C->destination_exprs().begin(), C->destination_exprs().end()); @@ -1402,18 +1826,17 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { (void)SingleScope.Privatize(); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - CGF.EnsureInsertPoint(); }; CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), CopyprivateVars, DestExprs, SrcExprs, AssignmentOps); // Emit an implicit barrier at the end (to avoid data race on firstprivate // init or if no 'nowait' clause was specified and no 'copyprivate' clause). - if ((!S.getSingleClause(OMPC_nowait) || HasFirstprivates) && + if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) && CopyprivateVars.empty()) { CGM.getOpenMPRuntime().emitBarrierCall( *this, S.getLocStart(), - S.getSingleClause(OMPC_nowait) ? OMPD_unknown : OMPD_single); + S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); } } @@ -1421,7 +1844,6 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - CGF.EnsureInsertPoint(); }; CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); } @@ -1430,10 +1852,13 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - CGF.EnsureInsertPoint(); }; - CGM.getOpenMPRuntime().emitCriticalRegion( - *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart()); + Expr *Hint = nullptr; + if (auto *HintClause = S.getSingleClause<OMPHintClause>()) + Hint = HintClause->getHint(); + CGM.getOpenMPRuntime().emitCriticalRegion(*this, + S.getDirectiveName().getAsString(), + CodeGen, S.getLocStart(), Hint); } void CodeGenFunction::EmitOMPParallelForDirective( @@ -1444,11 +1869,6 @@ void CodeGenFunction::EmitOMPParallelForDirective( (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); - // Emit implicit barrier at the end of parallel region, but this barrier - // is at the end of 'for' directive, so emit it as the implicit barrier for - // this 'for' directive. - CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), - OMPD_parallel); }; emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); } @@ -1461,11 +1881,6 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( (void)emitScheduleClause(*this, S, /*OuterRegion=*/true); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); - // Emit implicit barrier at the end of parallel region, but this barrier - // is at the end of 'for' directive, so emit it as the implicit barrier for - // this 'for' directive. - CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), - OMPD_parallel); }; emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); } @@ -1477,9 +1892,6 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { (void)CGF.EmitSections(S); - // Emit implicit barrier at the end of parallel region. - CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), - OMPD_parallel); }; emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); } @@ -1497,8 +1909,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Get list of private variables. llvm::SmallVector<const Expr *, 8> PrivateVars; llvm::SmallVector<const Expr *, 8> PrivateCopies; - for (auto &&I = S.getClausesOfKind(OMPC_private); I; ++I) { - auto *C = cast<OMPPrivateClause>(*I); + for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { auto IRef = C->varlist_begin(); for (auto *IInit : C->private_copies()) { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); @@ -1514,8 +1925,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { llvm::SmallVector<const Expr *, 8> FirstprivateVars; llvm::SmallVector<const Expr *, 8> FirstprivateCopies; llvm::SmallVector<const Expr *, 8> FirstprivateInits; - for (auto &&I = S.getClausesOfKind(OMPC_firstprivate); I; ++I) { - auto *C = cast<OMPFirstprivateClause>(*I); + for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { auto IRef = C->varlist_begin(); auto IElemInitRef = C->inits().begin(); for (auto *IInit : C->private_copies()) { @@ -1531,8 +1941,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Build list of dependences. llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> Dependences; - for (auto &&I = S.getClausesOfKind(OMPC_depend); I; ++I) { - auto *C = cast<OMPDependClause>(*I); + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { for (auto *IRef : C->varlists()) { Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); } @@ -1543,35 +1952,33 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); OMPPrivateScope Scope(CGF); if (!PrivateVars.empty() || !FirstprivateVars.empty()) { - auto *CopyFn = CGF.Builder.CreateAlignedLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)), - CGF.PointerAlignInBytes); - auto *PrivatesPtr = CGF.Builder.CreateAlignedLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)), - CGF.PointerAlignInBytes); + auto *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); + auto *PrivatesPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); // Map privates. - llvm::SmallVector<std::pair<const VarDecl *, llvm::Value *>, 16> + llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; llvm::SmallVector<llvm::Value *, 16> CallArgs; CallArgs.push_back(PrivatesPtr); for (auto *E : PrivateVars) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - auto *PrivatePtr = + Address PrivatePtr = CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); - CallArgs.push_back(PrivatePtr); + CallArgs.push_back(PrivatePtr.getPointer()); } for (auto *E : FirstprivateVars) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - auto *PrivatePtr = + Address PrivatePtr = CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); - CallArgs.push_back(PrivatePtr); + CallArgs.push_back(PrivatePtr.getPointer()); } CGF.EmitRuntimeCall(CopyFn, CallArgs); for (auto &&Pair : PrivatePtrs) { - auto *Replacement = - CGF.Builder.CreateAlignedLoad(Pair.second, CGF.PointerAlignInBytes); + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } } @@ -1584,13 +1991,13 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, OMPD_task, CodeGen); // Check if we should emit tied or untied task. - bool Tied = !S.getSingleClause(OMPC_untied); + bool Tied = !S.getSingleClause<OMPUntiedClause>(); // Check if the task is final llvm::PointerIntPair<llvm::Value *, 1, bool> Final; - if (auto *Clause = S.getSingleClause(OMPC_final)) { + if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. - auto *Cond = cast<OMPFinalClause>(Clause)->getCondition(); + auto *Cond = Clause->getCondition(); bool CondConstant; if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) Final.setInt(CondConstant); @@ -1602,8 +2009,12 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { } auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); const Expr *IfCond = nullptr; - if (auto C = S.getSingleClause(OMPC_if)) { - IfCond = cast<OMPIfClause>(C)->getCondition(); + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_task) { + IfCond = C->getCondition(); + break; + } } CGM.getOpenMPRuntime().emitTaskCall( *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, @@ -1629,15 +2040,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( LexicalScope Scope(*this, S.getSourceRange()); auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - CGF.EnsureInsertPoint(); }; CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); } void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { - if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { - auto FlushClause = cast<OMPFlushClause>(C); + if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) { return llvm::makeArrayRef(FlushClause->varlist_begin(), FlushClause->varlist_end()); } @@ -1645,37 +2054,65 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { }(), S.getLocStart()); } +void CodeGenFunction::EmitOMPDistributeDirective( + const OMPDistributeDirective &S) { + llvm_unreachable("CodeGen for 'omp distribute' is not supported yet."); +} + +static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, + const CapturedStmt *S) { + CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); + CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; + CGF.CapturedStmtInfo = &CapStmtInfo; + auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); + Fn->addFnAttr(llvm::Attribute::NoInline); + return Fn; +} + void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { + if (!S.getAssociatedStmt()) + return; LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - CGF.EnsureInsertPoint(); + auto *C = S.getSingleClause<OMPSIMDClause>(); + auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { + if (C) { + auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); + auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); + CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); + } else { + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + } }; - CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart()); + CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); } static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, - QualType SrcType, QualType DestType) { + QualType SrcType, QualType DestType, + SourceLocation Loc) { assert(CGF.hasScalarEvaluationKind(DestType) && "DestType must have scalar evaluation kind."); assert(!Val.isAggregate() && "Must be a scalar or complex."); return Val.isScalar() - ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) + ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType, + Loc) : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, - DestType); + DestType, Loc); } static CodeGenFunction::ComplexPairTy convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, - QualType DestType) { + QualType DestType, SourceLocation Loc) { assert(CGF.getEvaluationKind(DestType) == TEK_Complex && "DestType must have complex evaluation kind."); CodeGenFunction::ComplexPairTy ComplexVal; if (Val.isScalar()) { // Convert the input element to the element type of the complex. auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); - auto ScalarVal = - CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); + auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, + DestElementType, Loc); ComplexVal = CodeGenFunction::ComplexPairTy( ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); } else { @@ -1683,9 +2120,9 @@ convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); ComplexVal.first = CGF.EmitScalarConversion( - Val.getComplexVal().first, SrcElementType, DestElementType); + Val.getComplexVal().first, SrcElementType, DestElementType, Loc); ComplexVal.second = CGF.EmitScalarConversion( - Val.getComplexVal().second, SrcElementType, DestElementType); + Val.getComplexVal().second, SrcElementType, DestElementType, Loc); } return ComplexVal; } @@ -1702,16 +2139,16 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, } static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal, - QualType RValTy) { + QualType RValTy, SourceLocation Loc) { switch (CGF.getEvaluationKind(LVal.getType())) { case TEK_Scalar: - CGF.EmitStoreThroughLValue( - RValue::get(convertToScalarValue(CGF, RVal, RValTy, LVal.getType())), - LVal); + CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue( + CGF, RVal, RValTy, LVal.getType(), Loc)), + LVal); break; case TEK_Complex: CGF.EmitStoreOfComplex( - convertToComplexValue(CGF, RVal, RValTy, LVal.getType()), LVal, + convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal, /*isInit=*/false); break; case TEK_Aggregate: @@ -1739,7 +2176,7 @@ static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, // list. if (IsSeqCst) CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); - emitSimpleStore(CGF,VLValue, Res, X->getType().getNonReferenceType()); + emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc); } static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, @@ -1769,8 +2206,8 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && (Update.getScalarVal()->getType() != - X.getAddress()->getType()->getPointerElementType())) || - !X.getAddress()->getType()->getPointerElementType()->isIntegerTy() || + X.getAddress().getElementType())) || + !X.getAddress().getElementType()->isIntegerTy() || !Context.getTargetInfo().hasBuiltinAtomic( Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) return std::make_pair(false, RValue::get(nullptr)); @@ -1841,10 +2278,10 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, auto *UpdateVal = Update.getScalarVal(); if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { UpdateVal = CGF.Builder.CreateIntCast( - IC, X.getAddress()->getType()->getPointerElementType(), + IC, X.getAddress().getElementType(), X.getType()->hasSignedIntegerRepresentation()); } - auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO); + auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); return std::make_pair(true, RValue::get(Res)); } @@ -1910,12 +2347,14 @@ static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, } static RValue convertToType(CodeGenFunction &CGF, RValue Value, - QualType SourceType, QualType ResType) { + QualType SourceType, QualType ResType, + SourceLocation Loc) { switch (CGF.getEvaluationKind(ResType)) { case TEK_Scalar: - return RValue::get(convertToScalarValue(CGF, Value, SourceType, ResType)); + return RValue::get( + convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); case TEK_Complex: { - auto Res = convertToComplexValue(CGF, Value, SourceType, ResType); + auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); return RValue::getComplex(Res.first, Res.second); } case TEK_Aggregate: @@ -1980,7 +2419,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, // 'x' is simply rewritten with some 'expr'. NewVValType = X->getType().getNonReferenceType(); ExprRValue = convertToType(CGF, ExprRValue, E->getType(), - X->getType().getNonReferenceType()); + X->getType().getNonReferenceType(), Loc); auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { NewVVal = XRValue; return ExprRValue; @@ -1995,7 +2434,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, } } // Emit post-update store to 'v' of old/new 'x' value. - emitSimpleStore(CGF, VLValue, NewVVal, NewVValType); + emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc); // OpenMP, 2.12.6, atomic Construct // Any atomic construct with a seq_cst clause forces the atomically // performed operation to include an implicit flush operation without a @@ -2032,6 +2471,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_lastprivate: case OMPC_reduction: case OMPC_safelen: + case OMPC_simdlen: case OMPC_collapse: case OMPC_default: case OMPC_seq_cst: @@ -2049,12 +2489,23 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_threadprivate: case OMPC_depend: case OMPC_mergeable: + case OMPC_device: + case OMPC_threads: + case OMPC_simd: + case OMPC_map: + case OMPC_num_teams: + case OMPC_thread_limit: + case OMPC_priority: + case OMPC_grainsize: + case OMPC_nogroup: + case OMPC_num_tasks: + case OMPC_hint: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { - bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); + bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); OpenMPClauseKind Kind = OMPC_unknown; for (auto *C : S.clauses()) { // Find first clause (skip seq_cst clause, if it is first). @@ -2079,7 +2530,8 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { } LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) { + CGF.EmitStopPoint(CS); EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), S.getV(), S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); @@ -2087,8 +2539,37 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); } -void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { - llvm_unreachable("CodeGen for 'omp target' is not supported yet."); +void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); + + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + GenerateOpenMPCapturedVars(CS, CapturedVars); + + // Emit target region as a standalone region. + auto &&CodeGen = [&CS](CodeGenFunction &CGF) { + CGF.EmitStmt(CS.getCapturedStmt()); + }; + + // Obtain the target region outlined function. + llvm::Value *Fn = + CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, CodeGen); + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + + if (auto *C = S.getSingleClause<OMPIfClause>()) { + IfCond = C->getCondition(); + } + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (auto *C = S.getSingleClause<OMPDeviceClause>()) { + Device = C->getDevice(); + } + + CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device, + CapturedVars); } void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { @@ -2102,7 +2583,15 @@ void CodeGenFunction::EmitOMPCancellationPointDirective( } void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { - CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), + const Expr *IfCond = nullptr; + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_cancel) { + IfCond = C->getCondition(); + break; + } + } + CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, S.getCancelRegion()); } @@ -2110,8 +2599,35 @@ CodeGenFunction::JumpDest CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { if (Kind == OMPD_parallel || Kind == OMPD_task) return ReturnBlock; - else if (Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections) - return BreakContinueStack.empty() ? JumpDest() - : BreakContinueStack.back().BreakBlock; - return JumpDest(); + assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || + Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for); + return BreakContinueStack.back().BreakBlock; +} + +// Generate the instructions for '#pragma omp target data' directive. +void CodeGenFunction::EmitOMPTargetDataDirective( + const OMPTargetDataDirective &S) { + // emit the code inside the construct for now + auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_target_data, + [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); } + +void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { + // emit the code inside the construct for now + auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_taskloop, + [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); +} + +void CodeGenFunction::EmitOMPTaskLoopSimdDirective( + const OMPTaskLoopSimdDirective &S) { + // emit the code inside the construct for now + auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_taskloop_simd, + [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); +} + |