diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-29 16:25:46 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-29 16:25:46 +0000 |
commit | b5aee35cc5d62f11d98539f62e4fe63f0ac9edc6 (patch) | |
tree | 3e6ab962dbc73cfe1445a60d2eb4dfba7c939a22 /lib/CodeGen | |
parent | aa803409c3bd3930126db630c29f63d42f255153 (diff) | |
download | src-test2-b5aee35cc5d62f11d98539f62e4fe63f0ac9edc6.tar.gz src-test2-b5aee35cc5d62f11d98539f62e4fe63f0ac9edc6.zip |
Notes
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 144 | ||||
-rw-r--r-- | lib/CodeGen/CGCoroutine.cpp | 313 | ||||
-rw-r--r-- | lib/CodeGen/CGExpr.cpp | 31 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.cpp | 15 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 15 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenModule.cpp | 25 | ||||
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 30 |
7 files changed, 502 insertions, 71 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index e2d5f8f870de..2134fb9e03e4 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -7332,39 +7332,42 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, AVX512PF, AVX512VBMI, AVX512IFMA, + AVX512VPOPCNTDQ, MAX }; - X86Features Feature = StringSwitch<X86Features>(FeatureStr) - .Case("cmov", X86Features::CMOV) - .Case("mmx", X86Features::MMX) - .Case("popcnt", X86Features::POPCNT) - .Case("sse", X86Features::SSE) - .Case("sse2", X86Features::SSE2) - .Case("sse3", X86Features::SSE3) - .Case("ssse3", X86Features::SSSE3) - .Case("sse4.1", X86Features::SSE4_1) - .Case("sse4.2", X86Features::SSE4_2) - .Case("avx", X86Features::AVX) - .Case("avx2", X86Features::AVX2) - .Case("sse4a", X86Features::SSE4_A) - .Case("fma4", X86Features::FMA4) - .Case("xop", X86Features::XOP) - .Case("fma", X86Features::FMA) - .Case("avx512f", X86Features::AVX512F) - .Case("bmi", X86Features::BMI) - .Case("bmi2", X86Features::BMI2) - .Case("aes", X86Features::AES) - .Case("pclmul", X86Features::PCLMUL) - .Case("avx512vl", X86Features::AVX512VL) - .Case("avx512bw", X86Features::AVX512BW) - .Case("avx512dq", X86Features::AVX512DQ) - .Case("avx512cd", X86Features::AVX512CD) - .Case("avx512er", X86Features::AVX512ER) - .Case("avx512pf", X86Features::AVX512PF) - .Case("avx512vbmi", X86Features::AVX512VBMI) - .Case("avx512ifma", X86Features::AVX512IFMA) - .Default(X86Features::MAX); + X86Features Feature = + StringSwitch<X86Features>(FeatureStr) + .Case("cmov", X86Features::CMOV) + .Case("mmx", X86Features::MMX) + .Case("popcnt", X86Features::POPCNT) + .Case("sse", X86Features::SSE) + .Case("sse2", X86Features::SSE2) + .Case("sse3", X86Features::SSE3) + .Case("ssse3", X86Features::SSSE3) + .Case("sse4.1", X86Features::SSE4_1) + .Case("sse4.2", X86Features::SSE4_2) + .Case("avx", X86Features::AVX) + .Case("avx2", X86Features::AVX2) + .Case("sse4a", X86Features::SSE4_A) + .Case("fma4", X86Features::FMA4) + .Case("xop", X86Features::XOP) + .Case("fma", X86Features::FMA) + .Case("avx512f", X86Features::AVX512F) + .Case("bmi", X86Features::BMI) + .Case("bmi2", X86Features::BMI2) + .Case("aes", X86Features::AES) + .Case("pclmul", X86Features::PCLMUL) + .Case("avx512vl", X86Features::AVX512VL) + .Case("avx512bw", X86Features::AVX512BW) + .Case("avx512dq", X86Features::AVX512DQ) + .Case("avx512cd", X86Features::AVX512CD) + .Case("avx512er", X86Features::AVX512ER) + .Case("avx512pf", X86Features::AVX512PF) + .Case("avx512vbmi", X86Features::AVX512VBMI) + .Case("avx512ifma", X86Features::AVX512IFMA) + .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) + .Default(X86Features::MAX); assert(Feature != X86Features::MAX && "Invalid feature!"); // Matching the struct layout from the compiler-rt/libgcc structure that is @@ -7517,7 +7520,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } - + case X86::BI__builtin_ia32_vpopcntd_512: + case X86::BI__builtin_ia32_vpopcntq_512: { + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); + return Builder.CreateCall(F, Ops); + } case X86::BI__builtin_ia32_cvtmask2b128: case X86::BI__builtin_ia32_cvtmask2b256: case X86::BI__builtin_ia32_cvtmask2b512: @@ -8442,6 +8450,80 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops); } } + + case PPC::BI__builtin_vsx_xxpermdi: { + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + assert(ArgCI && "Third arg must be constant integer!"); + + unsigned Index = ArgCI->getZExtValue(); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + + // Element zero comes from the first input vector and element one comes from + // the second. The element indices within each vector are numbered in big + // endian order so the shuffle mask must be adjusted for this on little + // endian platforms (i.e. index is complemented and source vector reversed). + unsigned ElemIdx0; + unsigned ElemIdx1; + if (getTarget().isLittleEndian()) { + ElemIdx0 = (~Index & 1) + 2; + ElemIdx1 = (~Index & 2) >> 1; + } else { // BigEndian + ElemIdx0 = (Index & 2) >> 1; + ElemIdx1 = 2 + (Index & 1); + } + + Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), + ConstantInt::get(Int32Ty, ElemIdx1)}; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + Value *ShuffleCall = + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + QualType BIRetType = E->getType(); + auto RetTy = ConvertType(BIRetType); + return Builder.CreateBitCast(ShuffleCall, RetTy); + } + + case PPC::BI__builtin_vsx_xxsldwi: { + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + assert(ArgCI && "Third argument must be a compile time constant"); + unsigned Index = ArgCI->getZExtValue() & 0x3; + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); + + // Create a shuffle mask + unsigned ElemIdx0; + unsigned ElemIdx1; + unsigned ElemIdx2; + unsigned ElemIdx3; + if (getTarget().isLittleEndian()) { + // Little endian element N comes from element 8+N-Index of the + // concatenated wide vector (of course, using modulo arithmetic on + // the total number of elements). + ElemIdx0 = (8 - Index) % 8; + ElemIdx1 = (9 - Index) % 8; + ElemIdx2 = (10 - Index) % 8; + ElemIdx3 = (11 - Index) % 8; + } else { + // Big endian ElemIdx<N> = Index + N + ElemIdx0 = Index; + ElemIdx1 = Index + 1; + ElemIdx2 = Index + 2; + ElemIdx3 = Index + 3; + } + + Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), + ConstantInt::get(Int32Ty, ElemIdx1), + ConstantInt::get(Int32Ty, ElemIdx2), + ConstantInt::get(Int32Ty, ElemIdx3)}; + + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + Value *ShuffleCall = + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + QualType BIRetType = E->getType(); + auto RetTy = ConvertType(BIRetType); + return Builder.CreateBitCast(ShuffleCall, RetTy); + } } } diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp index 0ef680ef6609..c468c1bb4b5f 100644 --- a/lib/CodeGen/CGCoroutine.cpp +++ b/lib/CodeGen/CGCoroutine.cpp @@ -11,9 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "CGCleanup.h" #include "CodeGenFunction.h" #include "llvm/ADT/ScopeExit.h" #include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtVisitor.h" using namespace clang; using namespace CodeGen; @@ -57,6 +59,15 @@ struct clang::CodeGen::CGCoroData { // builtin. llvm::CallInst *CoroId = nullptr; + // Stores the llvm.coro.begin emitted in the function so that we can replace + // all coro.frame intrinsics with direct SSA value of coro.begin that returns + // the address of the coroutine frame of the current coroutine. + llvm::CallInst *CoroBegin = nullptr; + + // Stores the last emitted coro.free for the deallocate expressions, we use it + // to wrap dealloc code with if(auto mem = coro.free) dealloc(mem). + llvm::CallInst *LastCoroFree = nullptr; + // If coro.id came from the builtin, remember the expression to give better // diagnostic. If CoroIdExpr is nullptr, the coro.id was created by // EmitCoroutineBody. @@ -142,6 +153,20 @@ static RValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro, AwaitKind Kind, AggValueSlot aggSlot, bool ignoreResult) { auto *E = S.getCommonExpr(); + + // FIXME: rsmith 5/22/2017. Does it still make sense for us to have a + // UO_Coawait at all? As I recall, the only purpose it ever had was to + // represent a dependent co_await expression that couldn't yet be resolved to + // a CoawaitExpr. But now we have (and need!) a separate DependentCoawaitExpr + // node to store unqualified lookup results, it seems that the UnaryOperator + // portion of the representation serves no purpose (and as seen in this patch, + // it's getting in the way). Can we remove it? + + // Skip passthrough operator co_await (present when awaiting on an LValue). + if (auto *UO = dyn_cast<UnaryOperator>(E)) + if (UO->getOpcode() == UO_Coawait) + E = UO->getSubExpr(); + auto Binder = CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E); auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); }); @@ -215,7 +240,67 @@ void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) { EmitBranchThroughCleanup(CurCoro.Data->FinalJD); } -// For WinEH exception representation backend need to know what funclet coro.end +// Hunts for the parameter reference in the parameter copy/move declaration. +namespace { +struct GetParamRef : public StmtVisitor<GetParamRef> { +public: + DeclRefExpr *Expr = nullptr; + GetParamRef() {} + void VisitDeclRefExpr(DeclRefExpr *E) { + assert(Expr == nullptr && "multilple declref in param move"); + Expr = E; + } + void VisitStmt(Stmt *S) { + for (auto *C : S->children()) { + if (C) + Visit(C); + } + } +}; +} + +// This class replaces references to parameters to their copies by changing +// the addresses in CGF.LocalDeclMap and restoring back the original values in +// its destructor. + +namespace { + struct ParamReferenceReplacerRAII { + CodeGenFunction::DeclMapTy SavedLocals; + CodeGenFunction::DeclMapTy& LocalDeclMap; + + ParamReferenceReplacerRAII(CodeGenFunction::DeclMapTy &LocalDeclMap) + : LocalDeclMap(LocalDeclMap) {} + + void addCopy(DeclStmt const *PM) { + // Figure out what param it refers to. + + assert(PM->isSingleDecl()); + VarDecl const*VD = static_cast<VarDecl const*>(PM->getSingleDecl()); + Expr const *InitExpr = VD->getInit(); + GetParamRef Visitor; + Visitor.Visit(const_cast<Expr*>(InitExpr)); + assert(Visitor.Expr); + auto *DREOrig = cast<DeclRefExpr>(Visitor.Expr); + auto *PD = DREOrig->getDecl(); + + auto it = LocalDeclMap.find(PD); + assert(it != LocalDeclMap.end() && "parameter is not found"); + SavedLocals.insert({ PD, it->second }); + + auto copyIt = LocalDeclMap.find(VD); + assert(copyIt != LocalDeclMap.end() && "parameter copy is not found"); + it->second = copyIt->getSecond(); + } + + ~ParamReferenceReplacerRAII() { + for (auto&& SavedLocal : SavedLocals) { + LocalDeclMap.insert({SavedLocal.first, SavedLocal.second}); + } + } + }; +} + +// For WinEH exception representation backend needs to know what funclet coro.end // belongs to. That information is passed in a funclet bundle. static SmallVector<llvm::OperandBundleDef, 1> getBundlesForCoroEnd(CodeGenFunction &CGF) { @@ -257,24 +342,135 @@ namespace { struct CallCoroDelete final : public EHScopeStack::Cleanup { Stmt *Deallocate; - // TODO: Wrap deallocate in if(coro.free(...)) Deallocate. + // Emit "if (coro.free(CoroId, CoroBegin)) Deallocate;" + + // Note: That deallocation will be emitted twice: once for a normal exit and + // once for exceptional exit. This usage is safe because Deallocate does not + // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr() + // builds a single call to a deallocation function which is safe to emit + // multiple times. void Emit(CodeGenFunction &CGF, Flags) override { - // Note: That deallocation will be emitted twice: once for a normal exit and - // once for exceptional exit. This usage is safe because Deallocate does not - // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr() - // builds a single call to a deallocation function which is safe to emit - // multiple times. + // Remember the current point, as we are going to emit deallocation code + // first to get to coro.free instruction that is an argument to a delete + // call. + BasicBlock *SaveInsertBlock = CGF.Builder.GetInsertBlock(); + + auto *FreeBB = CGF.createBasicBlock("coro.free"); + CGF.EmitBlock(FreeBB); CGF.EmitStmt(Deallocate); + + auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free"); + CGF.EmitBlock(AfterFreeBB); + + // We should have captured coro.free from the emission of deallocate. + auto *CoroFree = CGF.CurCoro.Data->LastCoroFree; + if (!CoroFree) { + CGF.CGM.Error(Deallocate->getLocStart(), + "Deallocation expressoin does not refer to coro.free"); + return; + } + + // Get back to the block we were originally and move coro.free there. + auto *InsertPt = SaveInsertBlock->getTerminator(); + CoroFree->moveBefore(InsertPt); + CGF.Builder.SetInsertPoint(InsertPt); + + // Add if (auto *mem = coro.free) Deallocate; + auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy); + auto *Cond = CGF.Builder.CreateICmpNE(CoroFree, NullPtr); + CGF.Builder.CreateCondBr(Cond, FreeBB, AfterFreeBB); + + // No longer need old terminator. + InsertPt->eraseFromParent(); + CGF.Builder.SetInsertPoint(AfterFreeBB); } explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {} }; } +namespace { +struct GetReturnObjectManager { + CodeGenFunction &CGF; + CGBuilderTy &Builder; + const CoroutineBodyStmt &S; + + Address GroActiveFlag; + CodeGenFunction::AutoVarEmission GroEmission; + + GetReturnObjectManager(CodeGenFunction &CGF, const CoroutineBodyStmt &S) + : CGF(CGF), Builder(CGF.Builder), S(S), GroActiveFlag(Address::invalid()), + GroEmission(CodeGenFunction::AutoVarEmission::invalid()) {} + + // The gro variable has to outlive coroutine frame and coroutine promise, but, + // it can only be initialized after coroutine promise was created, thus, we + // split its emission in two parts. EmitGroAlloca emits an alloca and sets up + // cleanups. Later when coroutine promise is available we initialize the gro + // and sets the flag that the cleanup is now active. + + void EmitGroAlloca() { + auto *GroDeclStmt = dyn_cast<DeclStmt>(S.getResultDecl()); + if (!GroDeclStmt) { + // If get_return_object returns void, no need to do an alloca. + return; + } + + auto *GroVarDecl = cast<VarDecl>(GroDeclStmt->getSingleDecl()); + + // Set GRO flag that it is not initialized yet + GroActiveFlag = + CGF.CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), "gro.active"); + Builder.CreateStore(Builder.getFalse(), GroActiveFlag); + + GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl); + + // Remember the top of EHStack before emitting the cleanup. + auto old_top = CGF.EHStack.stable_begin(); + CGF.EmitAutoVarCleanups(GroEmission); + auto top = CGF.EHStack.stable_begin(); + + // Make the cleanup conditional on gro.active + for (auto b = CGF.EHStack.find(top), e = CGF.EHStack.find(old_top); + b != e; b++) { + if (auto *Cleanup = dyn_cast<EHCleanupScope>(&*b)) { + assert(!Cleanup->hasActiveFlag() && "cleanup already has active flag?"); + Cleanup->setActiveFlag(GroActiveFlag); + Cleanup->setTestFlagInEHCleanup(); + Cleanup->setTestFlagInNormalCleanup(); + } + } + } + + void EmitGroInit() { + if (!GroActiveFlag.isValid()) { + // No Gro variable was allocated. Simply emit the call to + // get_return_object. + CGF.EmitStmt(S.getResultDecl()); + return; + } + + CGF.EmitAutoVarInit(GroEmission); + Builder.CreateStore(Builder.getTrue(), GroActiveFlag); + } +}; +} + +static void emitBodyAndFallthrough(CodeGenFunction &CGF, + const CoroutineBodyStmt &S, Stmt *Body) { + CGF.EmitStmt(Body); + const bool CanFallthrough = CGF.Builder.GetInsertBlock(); + if (CanFallthrough) + if (Stmt *OnFallthrough = S.getFallthroughHandler()) + CGF.EmitStmt(OnFallthrough); +} + void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); auto &TI = CGM.getContext().getTargetInfo(); unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth(); + auto *EntryBB = Builder.GetInsertBlock(); + auto *AllocBB = createBasicBlock("coro.alloc"); + auto *InitBB = createBasicBlock("coro.init"); auto *FinalBB = createBasicBlock("coro.final"); auto *RetBB = createBasicBlock("coro.ret"); @@ -284,12 +480,20 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { createCoroData(*this, CurCoro, CoroId); CurCoro.Data->SuspendBB = RetBB; + // Backend is allowed to elide memory allocations, to help it, emit + // auto mem = coro.alloc() ? 0 : ... allocation code ...; + auto *CoroAlloc = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::coro_alloc), {CoroId}); + + Builder.CreateCondBr(CoroAlloc, AllocBB, InitBB); + + EmitBlock(AllocBB); auto *AllocateCall = EmitScalarExpr(S.getAllocate()); + auto *AllocOrInvokeContBB = Builder.GetInsertBlock(); // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided. if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) { auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure"); - auto *InitBB = createBasicBlock("coro.init"); // See if allocation was successful. auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy); @@ -299,40 +503,96 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // If not, return OnAllocFailure object. EmitBlock(RetOnFailureBB); EmitStmt(RetOnAllocFailure); - - EmitBlock(InitBB); } + else { + Builder.CreateBr(InitBB); + } + + EmitBlock(InitBB); + + // Pass the result of the allocation to coro.begin. + auto *Phi = Builder.CreatePHI(VoidPtrTy, 2); + Phi->addIncoming(NullPtr, EntryBB); + Phi->addIncoming(AllocateCall, AllocOrInvokeContBB); + auto *CoroBegin = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); + CurCoro.Data->CoroBegin = CoroBegin; + + GetReturnObjectManager GroManager(*this, S); + GroManager.EmitGroAlloca(); CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB); { + ParamReferenceReplacerRAII ParamReplacer(LocalDeclMap); CodeGenFunction::RunCleanupsScope ResumeScope(*this); EHStack.pushCleanup<CallCoroDelete>(NormalAndEHCleanup, S.getDeallocate()); + // Create parameter copies. We do it before creating a promise, since an + // evolution of coroutine TS may allow promise constructor to observe + // parameter copies. + for (auto *PM : S.getParamMoves()) { + EmitStmt(PM); + ParamReplacer.addCopy(cast<DeclStmt>(PM)); + // TODO: if(CoroParam(...)) need to surround ctor and dtor + // for the copy, so that llvm can elide it if the copy is + // not needed. + } + EmitStmt(S.getPromiseDeclStmt()); + Address PromiseAddr = GetAddrOfLocalVar(S.getPromiseDecl()); + auto *PromiseAddrVoidPtr = + new llvm::BitCastInst(PromiseAddr.getPointer(), VoidPtrTy, "", CoroId); + // Update CoroId to refer to the promise. We could not do it earlier because + // promise local variable was not emitted yet. + CoroId->setArgOperand(1, PromiseAddrVoidPtr); + + // Now we have the promise, initialize the GRO + GroManager.EmitGroInit(); + EHStack.pushCleanup<CallCoroEnd>(EHCleanup); + CurCoro.Data->CurrentAwaitKind = AwaitKind::Init; + EmitStmt(S.getInitSuspendStmt()); CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB); - // FIXME: Emit initial suspend and more before the body. - CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal; - EmitStmt(S.getBody()); + + if (auto *OnException = S.getExceptionHandler()) { + auto Loc = S.getLocStart(); + CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, OnException); + auto *TryStmt = CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch); + + EnterCXXTryStmt(*TryStmt); + emitBodyAndFallthrough(*this, S, TryStmt->getTryBlock()); + ExitCXXTryStmt(*TryStmt); + } + else { + emitBodyAndFallthrough(*this, S, S.getBody()); + } // See if we need to generate final suspend. const bool CanFallthrough = Builder.GetInsertBlock(); const bool HasCoreturns = CurCoro.Data->CoreturnCount > 0; if (CanFallthrough || HasCoreturns) { EmitBlock(FinalBB); - // FIXME: Emit final suspend. + CurCoro.Data->CurrentAwaitKind = AwaitKind::Final; + EmitStmt(S.getFinalSuspendStmt()); + } + else { + // We don't need FinalBB. Emit it to make sure the block is deleted. + EmitBlock(FinalBB, /*IsFinished=*/true); } } EmitBlock(RetBB); + // Emit coro.end before getReturnStmt (and parameter destructors), since + // resume and destroy parts of the coroutine should not include them. llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end); Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()}); - // FIXME: Emit return for the coroutine return object. + if (Stmt *Ret = S.getReturnStmt()) + EmitStmt(Ret); } // Emit coroutine intrinsic and patch up arguments of the token type. @@ -342,6 +602,17 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, switch (IID) { default: break; + // The coro.frame builtin is replaced with an SSA value of the coro.begin + // intrinsic. + case llvm::Intrinsic::coro_frame: { + if (CurCoro.Data && CurCoro.Data->CoroBegin) { + return RValue::get(CurCoro.Data->CoroBegin); + } + CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_begin " + "has been used earlier in this function"); + auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); + return RValue::get(NullPtr); + } // The following three intrinsics take a token parameter referring to a token // returned by earlier call to @llvm.coro.id. Since we cannot represent it in // builtins, we patch it up here. @@ -368,10 +639,22 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, llvm::Value *F = CGM.getIntrinsic(IID); llvm::CallInst *Call = Builder.CreateCall(F, Args); + // Note: The following code is to enable to emit coro.id and coro.begin by + // hand to experiment with coroutines in C. // If we see @llvm.coro.id remember it in the CoroData. We will update // coro.alloc, coro.begin and coro.free intrinsics to refer to it. if (IID == llvm::Intrinsic::coro_id) { createCoroData(*this, CurCoro, Call, E); } + else if (IID == llvm::Intrinsic::coro_begin) { + if (CurCoro.Data) + CurCoro.Data->CoroBegin = Call; + } + else if (IID == llvm::Intrinsic::coro_free) { + // Remember the last coro_free as we need it to build the conditional + // deletion of the coroutine frame. + if (CurCoro.Data) + CurCoro.Data->LastCoroFree = Call; + } return RValue::get(Call); } diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index a6d5dd85c234..b918a663ce5c 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -1432,11 +1432,12 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); } if (TBAAInfo) { - llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, - TBAAOffset); - if (TBAAPath) - CGM.DecorateInstructionWithTBAA(Load, TBAAPath, - false /*ConvertTypeToTag*/); + bool MayAlias = BaseInfo.getMayAlias(); + llvm::MDNode *TBAA = MayAlias + ? CGM.getTBAAInfo(getContext().CharTy) + : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); + if (TBAA) + CGM.DecorateInstructionWithTBAA(Load, TBAA, MayAlias); } if (EmitScalarRangeCheck(Load, Ty, Loc)) { @@ -1522,11 +1523,12 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); } if (TBAAInfo) { - llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, - TBAAOffset); - if (TBAAPath) - CGM.DecorateInstructionWithTBAA(Store, TBAAPath, - false /*ConvertTypeToTag*/); + bool MayAlias = BaseInfo.getMayAlias(); + llvm::MDNode *TBAA = MayAlias + ? CGM.getTBAAInfo(getContext().CharTy) + : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); + if (TBAA) + CGM.DecorateInstructionWithTBAA(Store, TBAA, MayAlias); } } @@ -3535,6 +3537,11 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, getFieldAlignmentSource(BaseInfo.getAlignmentSource()); LValueBaseInfo FieldBaseInfo(fieldAlignSource, BaseInfo.getMayAlias()); + const RecordDecl *rec = field->getParent(); + if (rec->isUnion() || rec->hasAttr<MayAliasAttr>()) + FieldBaseInfo.setMayAlias(true); + bool mayAlias = FieldBaseInfo.getMayAlias(); + if (field->isBitField()) { const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(field->getParent()); @@ -3556,11 +3563,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo); } - const RecordDecl *rec = field->getParent(); QualType type = field->getType(); - - bool mayAlias = rec->hasAttr<MayAliasAttr>(); - Address addr = base.getAddress(); unsigned cvr = base.getVRQualifiers(); bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA; diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 601d662e55e6..63300e25d37e 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -760,6 +760,7 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); Fn->removeFnAttr(llvm::Attribute::NoInline); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); Fn->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. @@ -2903,6 +2904,19 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); }); + if (CGM.supportsCOMDAT()) { + // It is sufficient to call registration function only once, so create a + // COMDAT group for registration/unregistration functions and associated + // data. That would reduce startup time and code size. Registration + // function serves as a COMDAT group key. + auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); + RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); + RegFn->setComdat(ComdatKey); + UnRegFn->setComdat(ComdatKey); + DeviceImages->setComdat(ComdatKey); + Desc->setComdat(ComdatKey); + } return RegFn; } @@ -3502,6 +3516,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, TaskPrivatesMapFnInfo); TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); + TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index c3391d087b75..bbedac962d55 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -861,6 +861,7 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal); OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); + OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); return OutlinedFun; @@ -1243,10 +1244,10 @@ static void emitReductionListCopy( /// local = local @ remote /// else /// local = remote -llvm::Value *emitReduceScratchpadFunction(CodeGenModule &CGM, - ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy, - llvm::Value *ReduceFn) { +static llvm::Value * +emitReduceScratchpadFunction(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, llvm::Value *ReduceFn) { auto &C = CGM.getContext(); auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); @@ -1372,9 +1373,9 @@ llvm::Value *emitReduceScratchpadFunction(CodeGenModule &CGM, /// for elem in Reduce List: /// scratchpad[elem_id][index] = elem /// -llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, - ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy) { +static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy) { auto &C = CGM.getContext(); auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 9d0f802ece07..e4e5fce02279 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -400,8 +400,11 @@ void CodeGenModule::Release() { } if (OpenMPRuntime) if (llvm::Function *OpenMPRegistrationFunction = - OpenMPRuntime->emitRegistrationFunction()) - AddGlobalCtor(OpenMPRegistrationFunction, 0); + OpenMPRuntime->emitRegistrationFunction()) { + auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? + OpenMPRegistrationFunction : nullptr; + AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey); + } if (PGOReader) { getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext)); if (PGOStats.hasDiagnostics()) @@ -904,7 +907,16 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, return; } - if (D->hasAttr<OptimizeNoneAttr>()) { + // Track whether we need to add the optnone LLVM attribute, + // starting with the default for this optimization level. + bool ShouldAddOptNone = + !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0; + // We can't add optnone in the following cases, it won't pass the verifier. + ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>(); + ShouldAddOptNone &= !F->hasFnAttribute(llvm::Attribute::AlwaysInline); + ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>(); + + if (ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) { B.addAttribute(llvm::Attribute::OptimizeNone); // OptimizeNone implies noinline; we should not be inlining such functions. @@ -958,7 +970,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // function. if (!D->hasAttr<OptimizeNoneAttr>()) { if (D->hasAttr<ColdAttr>()) { - B.addAttribute(llvm::Attribute::OptimizeForSize); + if (!ShouldAddOptNone) + B.addAttribute(llvm::Attribute::OptimizeForSize); B.addAttribute(llvm::Attribute::Cold); } @@ -1508,6 +1521,10 @@ bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, case ImbueAttr::ALWAYS: Fn->addFnAttr("function-instrument", "xray-always"); break; + case ImbueAttr::ALWAYS_ARG1: + Fn->addFnAttr("function-instrument", "xray-always"); + Fn->addFnAttr("xray-log-args", "1"); + break; case ImbueAttr::NEVER: Fn->addFnAttr("function-instrument", "xray-never"); break; diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 83aa1fdf5722..d0ba74119b7d 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -4821,6 +4821,9 @@ private: bool isSwiftErrorInRegister() const override { return true; } + + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -4994,6 +4997,17 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize, + llvm::Type *eltTy, + unsigned elts) const { + if (!llvm::isPowerOf2_32(elts)) + return false; + if (totalSize.getQuantity() != 8 && + (totalSize.getQuantity() != 16 || elts == 1)) + return false; + return true; +} + bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS64 must have base types of a floating // point type or a short-vector type. This is the same as the 32-bit ABI, @@ -5382,6 +5396,8 @@ private: bool isSwiftErrorInRegister() const override { return true; } + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { @@ -5894,6 +5910,20 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, + llvm::Type *eltTy, + unsigned numElts) const { + if (!llvm::isPowerOf2_32(numElts)) + return false; + unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy); + if (size > 64) + return false; + if (vectorSize.getQuantity() != 8 && + (vectorSize.getQuantity() != 16 || numElts == 1)) + return false; + return true; +} + bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS-VFP must have base types of float, // double, or 64-bit or 128-bit vectors. |