diff options
Diffstat (limited to 'lib/Transforms/Coroutines')
-rw-r--r-- | lib/Transforms/Coroutines/CoroCleanup.cpp | 7 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/CoroEarly.cpp | 26 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/CoroElide.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/CoroFrame.cpp | 652 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/CoroInstr.h | 205 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/CoroInternal.h | 162 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/CoroSplit.cpp | 1166 | ||||
-rw-r--r-- | lib/Transforms/Coroutines/Coroutines.cpp | 342 |
8 files changed, 2151 insertions, 411 deletions
diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp index 1fb0a114d0c7..c3e05577f044 100644 --- a/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -73,6 +73,8 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) { II->replaceAllUsesWith(ConstantInt::getTrue(Context)); break; case Intrinsic::coro_id: + case Intrinsic::coro_id_retcon: + case Intrinsic::coro_id_retcon_once: II->replaceAllUsesWith(ConstantTokenNone::get(Context)); break; case Intrinsic::coro_subfn_addr: @@ -111,8 +113,9 @@ struct CoroCleanup : FunctionPass { bool doInitialization(Module &M) override { if (coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr", "llvm.coro.free", - "llvm.coro.id"})) - L = llvm::make_unique<Lowerer>(M); + "llvm.coro.id", "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once"})) + L = std::make_unique<Lowerer>(M); return false; } diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp index 692697d6f32e..55993d33ee4e 100644 --- a/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/lib/Transforms/Coroutines/CoroEarly.cpp @@ -91,13 +91,14 @@ void Lowerer::lowerCoroDone(IntrinsicInst *II) { Value *Operand = II->getArgOperand(0); // ResumeFnAddr is the first pointer sized element of the coroutine frame. + static_assert(coro::Shape::SwitchFieldIndex::Resume == 0, + "resume function not at offset zero"); auto *FrameTy = Int8Ptr; PointerType *FramePtrTy = FrameTy->getPointerTo(); Builder.SetInsertPoint(II); auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy); - auto *Gep = Builder.CreateConstInBoundsGEP1_32(FrameTy, BCI, 0); - auto *Load = Builder.CreateLoad(FrameTy, Gep); + auto *Load = Builder.CreateLoad(BCI); auto *Cond = Builder.CreateICmpEQ(Load, NullPtr); II->replaceAllUsesWith(Cond); @@ -189,6 +190,10 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { } } break; + case Intrinsic::coro_id_retcon: + case Intrinsic::coro_id_retcon_once: + F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); + break; case Intrinsic::coro_resume: lowerResumeOrDestroy(CS, CoroSubFnInst::ResumeIndex); break; @@ -231,11 +236,18 @@ struct CoroEarly : public FunctionPass { // This pass has work to do only if we find intrinsics we are going to lower // in the module. bool doInitialization(Module &M) override { - if (coro::declaresIntrinsics( - M, {"llvm.coro.id", "llvm.coro.destroy", "llvm.coro.done", - "llvm.coro.end", "llvm.coro.noop", "llvm.coro.free", - "llvm.coro.promise", "llvm.coro.resume", "llvm.coro.suspend"})) - L = llvm::make_unique<Lowerer>(M); + if (coro::declaresIntrinsics(M, {"llvm.coro.id", + "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once", + "llvm.coro.destroy", + "llvm.coro.done", + "llvm.coro.end", + "llvm.coro.noop", + "llvm.coro.free", + "llvm.coro.promise", + "llvm.coro.resume", + "llvm.coro.suspend"})) + L = std::make_unique<Lowerer>(M); return false; } diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp index 6707aa1c827d..aca77119023b 100644 --- a/lib/Transforms/Coroutines/CoroElide.cpp +++ b/lib/Transforms/Coroutines/CoroElide.cpp @@ -286,7 +286,7 @@ struct CoroElide : FunctionPass { bool doInitialization(Module &M) override { if (coro::declaresIntrinsics(M, {"llvm.coro.id"})) - L = llvm::make_unique<Lowerer>(M); + L = std::make_unique<Lowerer>(M); return false; } diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp index 58bf22bee29b..2c42cf8a6d25 100644 --- a/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/lib/Transforms/Coroutines/CoroFrame.cpp @@ -18,6 +18,7 @@ #include "CoroInternal.h" #include "llvm/ADT/BitVector.h" +#include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" @@ -28,6 +29,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/circular_raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" using namespace llvm; @@ -120,6 +122,15 @@ struct SuspendCrossingInfo { return false; BasicBlock *UseBB = I->getParent(); + + // As a special case, treat uses by an llvm.coro.suspend.retcon + // as if they were uses in the suspend's single predecessor: the + // uses conceptually occur before the suspend. + if (isa<CoroSuspendRetconInst>(I)) { + UseBB = UseBB->getSinglePredecessor(); + assert(UseBB && "should have split coro.suspend into its own block"); + } + return hasPathCrossingSuspendPoint(DefBB, UseBB); } @@ -128,7 +139,17 @@ struct SuspendCrossingInfo { } bool isDefinitionAcrossSuspend(Instruction &I, User *U) const { - return isDefinitionAcrossSuspend(I.getParent(), U); + auto *DefBB = I.getParent(); + + // As a special case, treat values produced by an llvm.coro.suspend.* + // as if they were defined in the single successor: the uses + // conceptually occur after the suspend. + if (isa<AnyCoroSuspendInst>(I)) { + DefBB = DefBB->getSingleSuccessor(); + assert(DefBB && "should have split coro.suspend into its own block"); + } + + return isDefinitionAcrossSuspend(DefBB, U); } }; } // end anonymous namespace @@ -183,9 +204,10 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) B.Suspend = true; B.Kills |= B.Consumes; }; - for (CoroSuspendInst *CSI : Shape.CoroSuspends) { + for (auto *CSI : Shape.CoroSuspends) { markSuspendBlock(CSI); - markSuspendBlock(CSI->getCoroSave()); + if (auto *Save = CSI->getCoroSave()) + markSuspendBlock(Save); } // Iterate propagating consumes and kills until they stop changing. @@ -261,11 +283,13 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) // We build up the list of spills for every case where a use is separated // from the definition by a suspend point. +static const unsigned InvalidFieldIndex = ~0U; + namespace { class Spill { Value *Def = nullptr; Instruction *User = nullptr; - unsigned FieldNo = 0; + unsigned FieldNo = InvalidFieldIndex; public: Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {} @@ -280,11 +304,11 @@ public: // the definition the first time they encounter it. Consider refactoring // SpillInfo into two arrays to normalize the spill representation. unsigned fieldIndex() const { - assert(FieldNo && "Accessing unassigned field"); + assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field"); return FieldNo; } void setFieldIndex(unsigned FieldNumber) { - assert(!FieldNo && "Reassigning field number"); + assert(FieldNo == InvalidFieldIndex && "Reassigning field number"); FieldNo = FieldNumber; } }; @@ -376,18 +400,30 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, SmallString<32> Name(F.getName()); Name.append(".Frame"); StructType *FrameTy = StructType::create(C, Name); - auto *FramePtrTy = FrameTy->getPointerTo(); - auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, - /*isVarArg=*/false); - auto *FnPtrTy = FnTy->getPointerTo(); - - // Figure out how wide should be an integer type storing the suspend index. - unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); - Type *PromiseType = Shape.PromiseAlloca - ? Shape.PromiseAlloca->getType()->getElementType() - : Type::getInt1Ty(C); - SmallVector<Type *, 8> Types{FnPtrTy, FnPtrTy, PromiseType, - Type::getIntNTy(C, IndexBits)}; + SmallVector<Type *, 8> Types; + + AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); + + if (Shape.ABI == coro::ABI::Switch) { + auto *FramePtrTy = FrameTy->getPointerTo(); + auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, + /*IsVarArg=*/false); + auto *FnPtrTy = FnTy->getPointerTo(); + + // Figure out how wide should be an integer type storing the suspend index. + unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); + Type *PromiseType = PromiseAlloca + ? PromiseAlloca->getType()->getElementType() + : Type::getInt1Ty(C); + Type *IndexType = Type::getIntNTy(C, IndexBits); + Types.push_back(FnPtrTy); + Types.push_back(FnPtrTy); + Types.push_back(PromiseType); + Types.push_back(IndexType); + } else { + assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); + } + Value *CurrentDef = nullptr; Padder.addTypes(Types); @@ -399,7 +435,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, CurrentDef = S.def(); // PromiseAlloca was already added to Types array earlier. - if (CurrentDef == Shape.PromiseAlloca) + if (CurrentDef == PromiseAlloca) continue; uint64_t Count = 1; @@ -430,9 +466,80 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, } FrameTy->setBody(Types); + switch (Shape.ABI) { + case coro::ABI::Switch: + break; + + // Remember whether the frame is inline in the storage. + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + auto &Layout = F.getParent()->getDataLayout(); + auto Id = Shape.getRetconCoroId(); + Shape.RetconLowering.IsFrameInlineInStorage + = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() && + Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment()); + break; + } + } + return FrameTy; } +// We use a pointer use visitor to discover if there are any writes into an +// alloca that dominates CoroBegin. If that is the case, insertSpills will copy +// the value from the alloca into the coroutine frame spill slot corresponding +// to that alloca. +namespace { +struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> { + using Base = PtrUseVisitor<AllocaUseVisitor>; + AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, + const CoroBeginInst &CB) + : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} + + // We are only interested in uses that dominate coro.begin. + void visit(Instruction &I) { + if (DT.dominates(&I, &CoroBegin)) + Base::visit(I); + } + // We need to provide this overload as PtrUseVisitor uses a pointer based + // visiting function. + void visit(Instruction *I) { return visit(*I); } + + void visitLoadInst(LoadInst &) {} // Good. Nothing to do. + + // If the use is an operand, the pointer escaped and anything can write into + // that memory. If the use is the pointer, we are definitely writing into the + // alloca and therefore we need to copy. + void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); } + + // Any other instruction that is not filtered out by PtrUseVisitor, will + // result in the copy. + void visitInstruction(Instruction &I) { PI.setAborted(&I); } + +private: + const DominatorTree &DT; + const CoroBeginInst &CoroBegin; +}; +} // namespace +static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT, + const CoroBeginInst &CB) { + const DataLayout &DL = A.getModule()->getDataLayout(); + AllocaUseVisitor Visitor(DL, DT, CB); + auto PtrI = Visitor.visitPtr(A); + if (PtrI.isEscaped() || PtrI.isAborted()) { + auto *PointerEscapingInstr = PtrI.getEscapingInst() + ? PtrI.getEscapingInst() + : PtrI.getAbortingInst(); + if (PointerEscapingInstr) { + LLVM_DEBUG( + dbgs() << "AllocaInst copy was triggered by instruction: " + << *PointerEscapingInstr << "\n"); + } + return true; + } + return false; +} + // We need to make room to insert a spill after initial PHIs, but before // catchswitch instruction. Placing it before violates the requirement that // catchswitch, like all other EHPads must be the first nonPHI in a block. @@ -476,7 +583,7 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { // whatever // // -static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { +static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { auto *CB = Shape.CoroBegin; LLVMContext &C = CB->getContext(); IRBuilder<> Builder(CB->getNextNode()); @@ -484,11 +591,14 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { PointerType *FramePtrTy = FrameTy->getPointerTo(); auto *FramePtr = cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); + DominatorTree DT(*CB->getFunction()); Value *CurrentValue = nullptr; BasicBlock *CurrentBlock = nullptr; Value *CurrentReload = nullptr; - unsigned Index = 0; // Proper field number will be read from field definition. + + // Proper field number will be read from field definition. + unsigned Index = InvalidFieldIndex; // We need to keep track of any allocas that need "spilling" // since they will live in the coroutine frame now, all access to them @@ -496,9 +606,11 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { // we remember allocas and their indices to be handled once we processed // all the spills. SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas; - // Promise alloca (if present) has a fixed field number (Shape::PromiseField) - if (Shape.PromiseAlloca) - Allocas.emplace_back(Shape.PromiseAlloca, coro::Shape::PromiseField); + // Promise alloca (if present) has a fixed field number. + if (auto *PromiseAlloca = Shape.getPromiseAlloca()) { + assert(Shape.ABI == coro::ABI::Switch); + Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise); + } // Create a GEP with the given index into the coroutine frame for the original // value Orig. Appends an extra 0 index for array-allocas, preserving the @@ -526,7 +638,7 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { // Create a load instruction to reload the spilled value from the coroutine // frame. auto CreateReload = [&](Instruction *InsertBefore) { - assert(Index && "accessing unassigned field number"); + assert(Index != InvalidFieldIndex && "accessing unassigned field number"); Builder.SetInsertPoint(InsertBefore); auto *G = GetFramePointer(Index, CurrentValue); @@ -558,29 +670,45 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { // coroutine frame. Instruction *InsertPt = nullptr; - if (isa<Argument>(CurrentValue)) { + if (auto Arg = dyn_cast<Argument>(CurrentValue)) { // For arguments, we will place the store instruction right after // the coroutine frame pointer instruction, i.e. bitcast of // coro.begin from i8* to %f.frame*. InsertPt = FramePtr->getNextNode(); + + // If we're spilling an Argument, make sure we clear 'nocapture' + // from the coroutine function. + Arg->getParent()->removeParamAttr(Arg->getArgNo(), + Attribute::NoCapture); + } else if (auto *II = dyn_cast<InvokeInst>(CurrentValue)) { // If we are spilling the result of the invoke instruction, split the // normal edge and insert the spill in the new block. auto NewBB = SplitEdge(II->getParent(), II->getNormalDest()); InsertPt = NewBB->getTerminator(); - } else if (dyn_cast<PHINode>(CurrentValue)) { + } else if (isa<PHINode>(CurrentValue)) { // Skip the PHINodes and EH pads instructions. BasicBlock *DefBlock = cast<Instruction>(E.def())->getParent(); if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator())) InsertPt = splitBeforeCatchSwitch(CSI); else InsertPt = &*DefBlock->getFirstInsertionPt(); + } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) { + // Don't spill immediately after a suspend; splitting assumes + // that the suspend will be followed by a branch. + InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); } else { + auto *I = cast<Instruction>(E.def()); + assert(!I->isTerminator() && "unexpected terminator"); // For all other values, the spill is placed immediately after // the definition. - assert(!cast<Instruction>(E.def())->isTerminator() && - "unexpected terminator"); - InsertPt = cast<Instruction>(E.def())->getNextNode(); + if (DT.dominates(CB, I)) { + InsertPt = I->getNextNode(); + } else { + // Unless, it is not dominated by CoroBegin, then it will be + // inserted immediately after CoroFrame is computed. + InsertPt = FramePtr->getNextNode(); + } } Builder.SetInsertPoint(InsertPt); @@ -613,21 +741,53 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { } BasicBlock *FramePtrBB = FramePtr->getParent(); - Shape.AllocaSpillBlock = - FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); - Shape.AllocaSpillBlock->splitBasicBlock(&Shape.AllocaSpillBlock->front(), - "PostSpill"); - Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); + auto SpillBlock = + FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); + SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); + Shape.AllocaSpillBlock = SpillBlock; // If we found any allocas, replace all of their remaining uses with Geps. + // Note: we cannot do it indiscriminately as some of the uses may not be + // dominated by CoroBegin. + bool MightNeedToCopy = false; + Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); + SmallVector<Instruction *, 4> UsersToUpdate; for (auto &P : Allocas) { - auto *G = GetFramePointer(P.second, P.first); + AllocaInst *const A = P.first; + UsersToUpdate.clear(); + for (User *U : A->users()) { + auto *I = cast<Instruction>(U); + if (DT.dominates(CB, I)) + UsersToUpdate.push_back(I); + else + MightNeedToCopy = true; + } + if (!UsersToUpdate.empty()) { + auto *G = GetFramePointer(P.second, A); + G->takeName(A); + for (Instruction *I : UsersToUpdate) + I->replaceUsesOfWith(A, G); + } + } + // If we discovered such uses not dominated by CoroBegin, see if any of them + // preceed coro begin and have instructions that can modify the + // value of the alloca and therefore would require a copying the value into + // the spill slot in the coroutine frame. + if (MightNeedToCopy) { + Builder.SetInsertPoint(FramePtr->getNextNode()); + + for (auto &P : Allocas) { + AllocaInst *const A = P.first; + if (mightWriteIntoAllocaPtr(*A, DT, *CB)) { + if (A->isArrayAllocation()) + report_fatal_error( + "Coroutines cannot handle copying of array allocas yet"); - // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) here, - // as we are changing location of the instruction. - G->takeName(P.first); - P.first->replaceAllUsesWith(G); - P.first->eraseFromParent(); + auto *G = GetFramePointer(P.second, A); + auto *Value = Builder.CreateLoad(A); + Builder.CreateStore(Value, G); + } + } } return FramePtr; } @@ -829,52 +989,6 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB, } } -// Move early uses of spilled variable after CoroBegin. -// For example, if a parameter had address taken, we may end up with the code -// like: -// define @f(i32 %n) { -// %n.addr = alloca i32 -// store %n, %n.addr -// ... -// call @coro.begin -// we need to move the store after coro.begin -static void moveSpillUsesAfterCoroBegin(Function &F, SpillInfo const &Spills, - CoroBeginInst *CoroBegin) { - DominatorTree DT(F); - SmallVector<Instruction *, 8> NeedsMoving; - - Value *CurrentValue = nullptr; - - for (auto const &E : Spills) { - if (CurrentValue == E.def()) - continue; - - CurrentValue = E.def(); - - for (User *U : CurrentValue->users()) { - Instruction *I = cast<Instruction>(U); - if (!DT.dominates(CoroBegin, I)) { - LLVM_DEBUG(dbgs() << "will move: " << *I << "\n"); - - // TODO: Make this more robust. Currently if we run into a situation - // where simple instruction move won't work we panic and - // report_fatal_error. - for (User *UI : I->users()) { - if (!DT.dominates(CoroBegin, cast<Instruction>(UI))) - report_fatal_error("cannot move instruction since its users are not" - " dominated by CoroBegin"); - } - - NeedsMoving.push_back(I); - } - } - } - - Instruction *InsertPt = CoroBegin->getNextNode(); - for (Instruction *I : NeedsMoving) - I->moveBefore(InsertPt); -} - // Splits the block at a particular instruction unless it is the first // instruction in the block with a single predecessor. static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) { @@ -895,21 +1009,337 @@ static void splitAround(Instruction *I, const Twine &Name) { splitBlockIfNotFirst(I->getNextNode(), "After" + Name); } +static bool isSuspendBlock(BasicBlock *BB) { + return isa<AnyCoroSuspendInst>(BB->front()); +} + +typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet; + +/// Does control flow starting at the given block ever reach a suspend +/// instruction before reaching a block in VisitedOrFreeBBs? +static bool isSuspendReachableFrom(BasicBlock *From, + VisitedBlocksSet &VisitedOrFreeBBs) { + // Eagerly try to add this block to the visited set. If it's already + // there, stop recursing; this path doesn't reach a suspend before + // either looping or reaching a freeing block. + if (!VisitedOrFreeBBs.insert(From).second) + return false; + + // We assume that we'll already have split suspends into their own blocks. + if (isSuspendBlock(From)) + return true; + + // Recurse on the successors. + for (auto Succ : successors(From)) { + if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs)) + return true; + } + + return false; +} + +/// Is the given alloca "local", i.e. bounded in lifetime to not cross a +/// suspend point? +static bool isLocalAlloca(CoroAllocaAllocInst *AI) { + // Seed the visited set with all the basic blocks containing a free + // so that we won't pass them up. + VisitedBlocksSet VisitedOrFreeBBs; + for (auto User : AI->users()) { + if (auto FI = dyn_cast<CoroAllocaFreeInst>(User)) + VisitedOrFreeBBs.insert(FI->getParent()); + } + + return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs); +} + +/// After we split the coroutine, will the given basic block be along +/// an obvious exit path for the resumption function? +static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, + unsigned depth = 3) { + // If we've bottomed out our depth count, stop searching and assume + // that the path might loop back. + if (depth == 0) return false; + + // If this is a suspend block, we're about to exit the resumption function. + if (isSuspendBlock(BB)) return true; + + // Recurse into the successors. + for (auto Succ : successors(BB)) { + if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1)) + return false; + } + + // If none of the successors leads back in a loop, we're on an exit/abort. + return true; +} + +static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) { + // Look for a free that isn't sufficiently obviously followed by + // either a suspend or a termination, i.e. something that will leave + // the coro resumption frame. + for (auto U : AI->users()) { + auto FI = dyn_cast<CoroAllocaFreeInst>(U); + if (!FI) continue; + + if (!willLeaveFunctionImmediatelyAfter(FI->getParent())) + return true; + } + + // If we never found one, we don't need a stack save. + return false; +} + +/// Turn each of the given local allocas into a normal (dynamic) alloca +/// instruction. +static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas, + SmallVectorImpl<Instruction*> &DeadInsts) { + for (auto AI : LocalAllocas) { + auto M = AI->getModule(); + IRBuilder<> Builder(AI); + + // Save the stack depth. Try to avoid doing this if the stackrestore + // is going to immediately precede a return or something. + Value *StackSave = nullptr; + if (localAllocaNeedsStackSave(AI)) + StackSave = Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::stacksave)); + + // Allocate memory. + auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); + Alloca->setAlignment(MaybeAlign(AI->getAlignment())); + + for (auto U : AI->users()) { + // Replace gets with the allocation. + if (isa<CoroAllocaGetInst>(U)) { + U->replaceAllUsesWith(Alloca); + + // Replace frees with stackrestores. This is safe because + // alloca.alloc is required to obey a stack discipline, although we + // don't enforce that structurally. + } else { + auto FI = cast<CoroAllocaFreeInst>(U); + if (StackSave) { + Builder.SetInsertPoint(FI); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::stackrestore), + StackSave); + } + } + DeadInsts.push_back(cast<Instruction>(U)); + } + + DeadInsts.push_back(AI); + } +} + +/// Turn the given coro.alloca.alloc call into a dynamic allocation. +/// This happens during the all-instructions iteration, so it must not +/// delete the call. +static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI, + coro::Shape &Shape, + SmallVectorImpl<Instruction*> &DeadInsts) { + IRBuilder<> Builder(AI); + auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); + + for (User *U : AI->users()) { + if (isa<CoroAllocaGetInst>(U)) { + U->replaceAllUsesWith(Alloc); + } else { + auto FI = cast<CoroAllocaFreeInst>(U); + Builder.SetInsertPoint(FI); + Shape.emitDealloc(Builder, Alloc, nullptr); + } + DeadInsts.push_back(cast<Instruction>(U)); + } + + // Push this on last so that it gets deleted after all the others. + DeadInsts.push_back(AI); + + // Return the new allocation value so that we can check for needed spills. + return cast<Instruction>(Alloc); +} + +/// Get the current swifterror value. +static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, + coro::Shape &Shape) { + // Make a fake function pointer as a sort of intrinsic. + auto FnTy = FunctionType::get(ValueTy, {}, false); + auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); + + auto Call = Builder.CreateCall(Fn, {}); + Shape.SwiftErrorOps.push_back(Call); + + return Call; +} + +/// Set the given value as the current swifterror value. +/// +/// Returns a slot that can be used as a swifterror slot. +static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V, + coro::Shape &Shape) { + // Make a fake function pointer as a sort of intrinsic. + auto FnTy = FunctionType::get(V->getType()->getPointerTo(), + {V->getType()}, false); + auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); + + auto Call = Builder.CreateCall(Fn, { V }); + Shape.SwiftErrorOps.push_back(Call); + + return Call; +} + +/// Set the swifterror value from the given alloca before a call, +/// then put in back in the alloca afterwards. +/// +/// Returns an address that will stand in for the swifterror slot +/// until splitting. +static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call, + AllocaInst *Alloca, + coro::Shape &Shape) { + auto ValueTy = Alloca->getAllocatedType(); + IRBuilder<> Builder(Call); + + // Load the current value from the alloca and set it as the + // swifterror value. + auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca); + auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape); + + // Move to after the call. Since swifterror only has a guaranteed + // value on normal exits, we can ignore implicit and explicit unwind + // edges. + if (isa<CallInst>(Call)) { + Builder.SetInsertPoint(Call->getNextNode()); + } else { + auto Invoke = cast<InvokeInst>(Call); + Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg()); + } + + // Get the current swifterror value and store it to the alloca. + auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape); + Builder.CreateStore(ValueAfterCall, Alloca); + + return Addr; +} + +/// Eliminate a formerly-swifterror alloca by inserting the get/set +/// intrinsics and attempting to MemToReg the alloca away. +static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca, + coro::Shape &Shape) { + for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) { + // We're likely changing the use list, so use a mutation-safe + // iteration pattern. + auto &Use = *UI; + ++UI; + + // swifterror values can only be used in very specific ways. + // We take advantage of that here. + auto User = Use.getUser(); + if (isa<LoadInst>(User) || isa<StoreInst>(User)) + continue; + + assert(isa<CallInst>(User) || isa<InvokeInst>(User)); + auto Call = cast<Instruction>(User); + + auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape); + + // Use the returned slot address as the call argument. + Use.set(Addr); + } + + // All the uses should be loads and stores now. + assert(isAllocaPromotable(Alloca)); +} + +/// "Eliminate" a swifterror argument by reducing it to the alloca case +/// and then loading and storing in the prologue and epilog. +/// +/// The argument keeps the swifterror flag. +static void eliminateSwiftErrorArgument(Function &F, Argument &Arg, + coro::Shape &Shape, + SmallVectorImpl<AllocaInst*> &AllocasToPromote) { + IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); + + auto ArgTy = cast<PointerType>(Arg.getType()); + auto ValueTy = ArgTy->getElementType(); + + // Reduce to the alloca case: + + // Create an alloca and replace all uses of the arg with it. + auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace()); + Arg.replaceAllUsesWith(Alloca); + + // Set an initial value in the alloca. swifterror is always null on entry. + auto InitialValue = Constant::getNullValue(ValueTy); + Builder.CreateStore(InitialValue, Alloca); + + // Find all the suspends in the function and save and restore around them. + for (auto Suspend : Shape.CoroSuspends) { + (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape); + } + + // Find all the coro.ends in the function and restore the error value. + for (auto End : Shape.CoroEnds) { + Builder.SetInsertPoint(End); + auto FinalValue = Builder.CreateLoad(ValueTy, Alloca); + (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape); + } + + // Now we can use the alloca logic. + AllocasToPromote.push_back(Alloca); + eliminateSwiftErrorAlloca(F, Alloca, Shape); +} + +/// Eliminate all problematic uses of swifterror arguments and allocas +/// from the function. We'll fix them up later when splitting the function. +static void eliminateSwiftError(Function &F, coro::Shape &Shape) { + SmallVector<AllocaInst*, 4> AllocasToPromote; + + // Look for a swifterror argument. + for (auto &Arg : F.args()) { + if (!Arg.hasSwiftErrorAttr()) continue; + + eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote); + break; + } + + // Look for swifterror allocas. + for (auto &Inst : F.getEntryBlock()) { + auto Alloca = dyn_cast<AllocaInst>(&Inst); + if (!Alloca || !Alloca->isSwiftError()) continue; + + // Clear the swifterror flag. + Alloca->setSwiftError(false); + + AllocasToPromote.push_back(Alloca); + eliminateSwiftErrorAlloca(F, Alloca, Shape); + } + + // If we have any allocas to promote, compute a dominator tree and + // promote them en masse. + if (!AllocasToPromote.empty()) { + DominatorTree DT(F); + PromoteMemToReg(AllocasToPromote, DT); + } +} + void coro::buildCoroutineFrame(Function &F, Shape &Shape) { // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite // access to local variables. LowerDbgDeclare(F); - Shape.PromiseAlloca = Shape.CoroBegin->getId()->getPromise(); - if (Shape.PromiseAlloca) { - Shape.CoroBegin->getId()->clearPromise(); + eliminateSwiftError(F, Shape); + + if (Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.PromiseAlloca) { + Shape.getSwitchCoroId()->clearPromise(); } // Make sure that all coro.save, coro.suspend and the fallthrough coro.end // intrinsics are in their own blocks to simplify the logic of building up // SuspendCrossing data. - for (CoroSuspendInst *CSI : Shape.CoroSuspends) { - splitAround(CSI->getCoroSave(), "CoroSave"); + for (auto *CSI : Shape.CoroSuspends) { + if (auto *Save = CSI->getCoroSave()) + splitAround(Save, "CoroSave"); splitAround(CSI, "CoroSuspend"); } @@ -926,6 +1356,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { IRBuilder<> Builder(F.getContext()); SpillInfo Spills; + SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas; + SmallVector<Instruction*, 4> DeadInstructions; for (int Repeat = 0; Repeat < 4; ++Repeat) { // See if there are materializable instructions across suspend points. @@ -955,11 +1387,40 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { // of the Coroutine Frame. if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) continue; + // The Coroutine Promise always included into coroutine frame, no need to // check for suspend crossing. - if (Shape.PromiseAlloca == &I) + if (Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.PromiseAlloca == &I) continue; + // Handle alloca.alloc specially here. + if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) { + // Check whether the alloca's lifetime is bounded by suspend points. + if (isLocalAlloca(AI)) { + LocalAllocas.push_back(AI); + continue; + } + + // If not, do a quick rewrite of the alloca and then add spills of + // the rewritten value. The rewrite doesn't invalidate anything in + // Spills because the other alloca intrinsics have no other operands + // besides AI, and it doesn't invalidate the iteration because we delay + // erasing AI. + auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); + + for (User *U : Alloc->users()) { + if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) + Spills.emplace_back(Alloc, U); + } + continue; + } + + // Ignore alloca.get; we process this as part of coro.alloca.alloc. + if (isa<CoroAllocaGetInst>(I)) { + continue; + } + for (User *U : I.users()) if (Checker.isDefinitionAcrossSuspend(I, U)) { // We cannot spill a token. @@ -970,7 +1431,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { } } LLVM_DEBUG(dump("Spills", Spills)); - moveSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); Shape.FrameTy = buildFrameType(F, Shape, Spills); Shape.FramePtr = insertSpills(Spills, Shape); + lowerLocalAllocas(LocalAllocas, DeadInstructions); + + for (auto I : DeadInstructions) + I->eraseFromParent(); } diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h index 5e19d7642e38..de2d2920cb15 100644 --- a/lib/Transforms/Coroutines/CoroInstr.h +++ b/lib/Transforms/Coroutines/CoroInstr.h @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -77,10 +78,8 @@ public: } }; -/// This represents the llvm.coro.alloc instruction. -class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst { - enum { AlignArg, PromiseArg, CoroutineArg, InfoArg }; - +/// This represents a common base class for llvm.coro.id instructions. +class LLVM_LIBRARY_VISIBILITY AnyCoroIdInst : public IntrinsicInst { public: CoroAllocInst *getCoroAlloc() { for (User *U : users()) @@ -97,6 +96,24 @@ public: llvm_unreachable("no coro.begin associated with coro.id"); } + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + auto ID = I->getIntrinsicID(); + return ID == Intrinsic::coro_id || + ID == Intrinsic::coro_id_retcon || + ID == Intrinsic::coro_id_retcon_once; + } + + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + +/// This represents the llvm.coro.id instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdInst : public AnyCoroIdInst { + enum { AlignArg, PromiseArg, CoroutineArg, InfoArg }; + +public: AllocaInst *getPromise() const { Value *Arg = getArgOperand(PromiseArg); return isa<ConstantPointerNull>(Arg) @@ -182,6 +199,80 @@ public: } }; +/// This represents either the llvm.coro.id.retcon or +/// llvm.coro.id.retcon.once instruction. +class LLVM_LIBRARY_VISIBILITY AnyCoroIdRetconInst : public AnyCoroIdInst { + enum { SizeArg, AlignArg, StorageArg, PrototypeArg, AllocArg, DeallocArg }; + +public: + void checkWellFormed() const; + + uint64_t getStorageSize() const { + return cast<ConstantInt>(getArgOperand(SizeArg))->getZExtValue(); + } + + uint64_t getStorageAlignment() const { + return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue(); + } + + Value *getStorage() const { + return getArgOperand(StorageArg); + } + + /// Return the prototype for the continuation function. The type, + /// attributes, and calling convention of the continuation function(s) + /// are taken from this declaration. + Function *getPrototype() const { + return cast<Function>(getArgOperand(PrototypeArg)->stripPointerCasts()); + } + + /// Return the function to use for allocating memory. + Function *getAllocFunction() const { + return cast<Function>(getArgOperand(AllocArg)->stripPointerCasts()); + } + + /// Return the function to use for deallocating memory. + Function *getDeallocFunction() const { + return cast<Function>(getArgOperand(DeallocArg)->stripPointerCasts()); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + auto ID = I->getIntrinsicID(); + return ID == Intrinsic::coro_id_retcon + || ID == Intrinsic::coro_id_retcon_once; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + +/// This represents the llvm.coro.id.retcon instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdRetconInst + : public AnyCoroIdRetconInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_id_retcon; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + +/// This represents the llvm.coro.id.retcon.once instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdRetconOnceInst + : public AnyCoroIdRetconInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_id_retcon_once; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + /// This represents the llvm.coro.frame instruction. class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst { public: @@ -215,7 +306,9 @@ class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { enum { IdArg, MemArg }; public: - CoroIdInst *getId() const { return cast<CoroIdInst>(getArgOperand(IdArg)); } + AnyCoroIdInst *getId() const { + return cast<AnyCoroIdInst>(getArgOperand(IdArg)); + } Value *getMem() const { return getArgOperand(MemArg); } @@ -261,8 +354,22 @@ public: } }; +class LLVM_LIBRARY_VISIBILITY AnyCoroSuspendInst : public IntrinsicInst { +public: + CoroSaveInst *getCoroSave() const; + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_suspend || + I->getIntrinsicID() == Intrinsic::coro_suspend_retcon; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + /// This represents the llvm.coro.suspend instruction. -class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public IntrinsicInst { +class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public AnyCoroSuspendInst { enum { SaveArg, FinalArg }; public: @@ -273,6 +380,7 @@ public: assert(isa<ConstantTokenNone>(Arg)); return nullptr; } + bool isFinal() const { return cast<Constant>(getArgOperand(FinalArg))->isOneValue(); } @@ -286,6 +394,37 @@ public: } }; +inline CoroSaveInst *AnyCoroSuspendInst::getCoroSave() const { + if (auto Suspend = dyn_cast<CoroSuspendInst>(this)) + return Suspend->getCoroSave(); + return nullptr; +} + +/// This represents the llvm.coro.suspend.retcon instruction. +class LLVM_LIBRARY_VISIBILITY CoroSuspendRetconInst : public AnyCoroSuspendInst { +public: + op_iterator value_begin() { return arg_begin(); } + const_op_iterator value_begin() const { return arg_begin(); } + + op_iterator value_end() { return arg_end(); } + const_op_iterator value_end() const { return arg_end(); } + + iterator_range<op_iterator> value_operands() { + return make_range(value_begin(), value_end()); + } + iterator_range<const_op_iterator> value_operands() const { + return make_range(value_begin(), value_end()); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_suspend_retcon; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + /// This represents the llvm.coro.size instruction. class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst { public: @@ -317,6 +456,60 @@ public: } }; +/// This represents the llvm.coro.alloca.alloc instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaAllocInst : public IntrinsicInst { + enum { SizeArg, AlignArg }; +public: + Value *getSize() const { + return getArgOperand(SizeArg); + } + unsigned getAlignment() const { + return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue(); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_alloc; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + +/// This represents the llvm.coro.alloca.get instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaGetInst : public IntrinsicInst { + enum { AllocArg }; +public: + CoroAllocaAllocInst *getAlloc() const { + return cast<CoroAllocaAllocInst>(getArgOperand(AllocArg)); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_get; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + +/// This represents the llvm.coro.alloca.free instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaFreeInst : public IntrinsicInst { + enum { AllocArg }; +public: + CoroAllocaAllocInst *getAlloc() const { + return cast<CoroAllocaAllocInst>(getArgOperand(AllocArg)); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_free; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + } // End namespace llvm. #endif diff --git a/lib/Transforms/Coroutines/CoroInternal.h b/lib/Transforms/Coroutines/CoroInternal.h index 441c8a20f1f3..c151474316f9 100644 --- a/lib/Transforms/Coroutines/CoroInternal.h +++ b/lib/Transforms/Coroutines/CoroInternal.h @@ -12,6 +12,7 @@ #define LLVM_LIB_TRANSFORMS_COROUTINES_COROINTERNAL_H #include "CoroInstr.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/Transforms/Coroutines.h" namespace llvm { @@ -61,37 +62,174 @@ struct LowererBase { Value *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt); }; +enum class ABI { + /// The "resume-switch" lowering, where there are separate resume and + /// destroy functions that are shared between all suspend points. The + /// coroutine frame implicitly stores the resume and destroy functions, + /// the current index, and any promise value. + Switch, + + /// The "returned-continuation" lowering, where each suspend point creates a + /// single continuation function that is used for both resuming and + /// destroying. Does not support promises. + Retcon, + + /// The "unique returned-continuation" lowering, where each suspend point + /// creates a single continuation function that is used for both resuming + /// and destroying. Does not support promises. The function is known to + /// suspend at most once during its execution, and the return value of + /// the continuation is void. + RetconOnce, +}; + // Holds structural Coroutine Intrinsics for a particular function and other // values used during CoroSplit pass. struct LLVM_LIBRARY_VISIBILITY Shape { CoroBeginInst *CoroBegin; SmallVector<CoroEndInst *, 4> CoroEnds; SmallVector<CoroSizeInst *, 2> CoroSizes; - SmallVector<CoroSuspendInst *, 4> CoroSuspends; - - // Field Indexes for known coroutine frame fields. - enum { - ResumeField, - DestroyField, - PromiseField, - IndexField, + SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends; + SmallVector<CallInst*, 2> SwiftErrorOps; + + // Field indexes for special fields in the switch lowering. + struct SwitchFieldIndex { + enum { + Resume, + Destroy, + Promise, + Index, + /// The index of the first spill field. + FirstSpill + }; }; + coro::ABI ABI; + StructType *FrameTy; Instruction *FramePtr; BasicBlock *AllocaSpillBlock; - SwitchInst *ResumeSwitch; - AllocaInst *PromiseAlloca; - bool HasFinalSuspend; + + struct SwitchLoweringStorage { + SwitchInst *ResumeSwitch; + AllocaInst *PromiseAlloca; + BasicBlock *ResumeEntryBlock; + bool HasFinalSuspend; + }; + + struct RetconLoweringStorage { + Function *ResumePrototype; + Function *Alloc; + Function *Dealloc; + BasicBlock *ReturnBlock; + bool IsFrameInlineInStorage; + }; + + union { + SwitchLoweringStorage SwitchLowering; + RetconLoweringStorage RetconLowering; + }; + + CoroIdInst *getSwitchCoroId() const { + assert(ABI == coro::ABI::Switch); + return cast<CoroIdInst>(CoroBegin->getId()); + } + + AnyCoroIdRetconInst *getRetconCoroId() const { + assert(ABI == coro::ABI::Retcon || + ABI == coro::ABI::RetconOnce); + return cast<AnyCoroIdRetconInst>(CoroBegin->getId()); + } IntegerType *getIndexType() const { + assert(ABI == coro::ABI::Switch); assert(FrameTy && "frame type not assigned"); - return cast<IntegerType>(FrameTy->getElementType(IndexField)); + return cast<IntegerType>(FrameTy->getElementType(SwitchFieldIndex::Index)); } ConstantInt *getIndex(uint64_t Value) const { return ConstantInt::get(getIndexType(), Value); } + PointerType *getSwitchResumePointerType() const { + assert(ABI == coro::ABI::Switch); + assert(FrameTy && "frame type not assigned"); + return cast<PointerType>(FrameTy->getElementType(SwitchFieldIndex::Resume)); + } + + FunctionType *getResumeFunctionType() const { + switch (ABI) { + case coro::ABI::Switch: { + auto *FnPtrTy = getSwitchResumePointerType(); + return cast<FunctionType>(FnPtrTy->getPointerElementType()); + } + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return RetconLowering.ResumePrototype->getFunctionType(); + } + llvm_unreachable("Unknown coro::ABI enum"); + } + + ArrayRef<Type*> getRetconResultTypes() const { + assert(ABI == coro::ABI::Retcon || + ABI == coro::ABI::RetconOnce); + auto FTy = CoroBegin->getFunction()->getFunctionType(); + + // The safety of all this is checked by checkWFRetconPrototype. + if (auto STy = dyn_cast<StructType>(FTy->getReturnType())) { + return STy->elements().slice(1); + } else { + return ArrayRef<Type*>(); + } + } + + ArrayRef<Type*> getRetconResumeTypes() const { + assert(ABI == coro::ABI::Retcon || + ABI == coro::ABI::RetconOnce); + + // The safety of all this is checked by checkWFRetconPrototype. + auto FTy = RetconLowering.ResumePrototype->getFunctionType(); + return FTy->params().slice(1); + } + + CallingConv::ID getResumeFunctionCC() const { + switch (ABI) { + case coro::ABI::Switch: + return CallingConv::Fast; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return RetconLowering.ResumePrototype->getCallingConv(); + } + llvm_unreachable("Unknown coro::ABI enum"); + } + + unsigned getFirstSpillFieldIndex() const { + switch (ABI) { + case coro::ABI::Switch: + return SwitchFieldIndex::FirstSpill; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return 0; + } + llvm_unreachable("Unknown coro::ABI enum"); + } + + AllocaInst *getPromiseAlloca() const { + if (ABI == coro::ABI::Switch) + return SwitchLowering.PromiseAlloca; + return nullptr; + } + + /// Allocate memory according to the rules of the active lowering. + /// + /// \param CG - if non-null, will be updated for the new call + Value *emitAlloc(IRBuilder<> &Builder, Value *Size, CallGraph *CG) const; + + /// Deallocate memory according to the rules of the active lowering. + /// + /// \param CG - if non-null, will be updated for the new call + void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; + Shape() = default; explicit Shape(Function &F) { buildFrom(F); } void buildFrom(Function &F); diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index 5458e70ff16a..04723cbde417 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -55,6 +55,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -70,9 +71,197 @@ using namespace llvm; #define DEBUG_TYPE "coro-split" +namespace { + +/// A little helper class for building +class CoroCloner { +public: + enum class Kind { + /// The shared resume function for a switch lowering. + SwitchResume, + + /// The shared unwind function for a switch lowering. + SwitchUnwind, + + /// The shared cleanup function for a switch lowering. + SwitchCleanup, + + /// An individual continuation function. + Continuation, + }; +private: + Function &OrigF; + Function *NewF; + const Twine &Suffix; + coro::Shape &Shape; + Kind FKind; + ValueToValueMapTy VMap; + IRBuilder<> Builder; + Value *NewFramePtr = nullptr; + Value *SwiftErrorSlot = nullptr; + + /// The active suspend instruction; meaningful only for continuation ABIs. + AnyCoroSuspendInst *ActiveSuspend = nullptr; + +public: + /// Create a cloner for a switch lowering. + CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, + Kind FKind) + : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), + FKind(FKind), Builder(OrigF.getContext()) { + assert(Shape.ABI == coro::ABI::Switch); + } + + /// Create a cloner for a continuation lowering. + CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, + Function *NewF, AnyCoroSuspendInst *ActiveSuspend) + : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), + FKind(Kind::Continuation), Builder(OrigF.getContext()), + ActiveSuspend(ActiveSuspend) { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + assert(NewF && "need existing function for continuation"); + assert(ActiveSuspend && "need active suspend point for continuation"); + } + + Function *getFunction() const { + assert(NewF != nullptr && "declaration not yet set"); + return NewF; + } + + void create(); + +private: + bool isSwitchDestroyFunction() { + switch (FKind) { + case Kind::Continuation: + case Kind::SwitchResume: + return false; + case Kind::SwitchUnwind: + case Kind::SwitchCleanup: + return true; + } + llvm_unreachable("Unknown CoroCloner::Kind enum"); + } + + void createDeclaration(); + void replaceEntryBlock(); + Value *deriveNewFramePointer(); + void replaceRetconSuspendUses(); + void replaceCoroSuspends(); + void replaceCoroEnds(); + void replaceSwiftErrorOps(); + void handleFinalSuspend(); + void maybeFreeContinuationStorage(); +}; + +} // end anonymous namespace + +static void maybeFreeRetconStorage(IRBuilder<> &Builder, coro::Shape &Shape, + Value *FramePtr, CallGraph *CG) { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + if (Shape.RetconLowering.IsFrameInlineInStorage) + return; + + Shape.emitDealloc(Builder, FramePtr, CG); +} + +/// Replace a non-unwind call to llvm.coro.end. +static void replaceFallthroughCoroEnd(CoroEndInst *End, coro::Shape &Shape, + Value *FramePtr, bool InResume, + CallGraph *CG) { + // Start inserting right before the coro.end. + IRBuilder<> Builder(End); + + // Create the return instruction. + switch (Shape.ABI) { + // The cloned functions in switch-lowering always return void. + case coro::ABI::Switch: + // coro.end doesn't immediately end the coroutine in the main function + // in this lowering, because we need to deallocate the coroutine. + if (!InResume) + return; + Builder.CreateRetVoid(); + break; + + // In unique continuation lowering, the continuations always return void. + // But we may have implicitly allocated storage. + case coro::ABI::RetconOnce: + maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); + Builder.CreateRetVoid(); + break; + + // In non-unique continuation lowering, we signal completion by returning + // a null continuation. + case coro::ABI::Retcon: { + maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); + auto RetTy = Shape.getResumeFunctionType()->getReturnType(); + auto RetStructTy = dyn_cast<StructType>(RetTy); + PointerType *ContinuationTy = + cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy); + + Value *ReturnValue = ConstantPointerNull::get(ContinuationTy); + if (RetStructTy) { + ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy), + ReturnValue, 0); + } + Builder.CreateRet(ReturnValue); + break; + } + } + + // Remove the rest of the block, by splitting it into an unreachable block. + auto *BB = End->getParent(); + BB->splitBasicBlock(End); + BB->getTerminator()->eraseFromParent(); +} + +/// Replace an unwind call to llvm.coro.end. +static void replaceUnwindCoroEnd(CoroEndInst *End, coro::Shape &Shape, + Value *FramePtr, bool InResume, CallGraph *CG){ + IRBuilder<> Builder(End); + + switch (Shape.ABI) { + // In switch-lowering, this does nothing in the main function. + case coro::ABI::Switch: + if (!InResume) + return; + break; + + // In continuation-lowering, this frees the continuation storage. + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); + break; + } + + // If coro.end has an associated bundle, add cleanupret instruction. + if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { + auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]); + auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); + End->getParent()->splitBasicBlock(End); + CleanupRet->getParent()->getTerminator()->eraseFromParent(); + } +} + +static void replaceCoroEnd(CoroEndInst *End, coro::Shape &Shape, + Value *FramePtr, bool InResume, CallGraph *CG) { + if (End->isUnwind()) + replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); + else + replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); + + auto &Context = End->getContext(); + End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) + : ConstantInt::getFalse(Context)); + End->eraseFromParent(); +} + // Create an entry block for a resume function with a switch that will jump to // suspend points. -static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { +static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { + assert(Shape.ABI == coro::ABI::Switch); LLVMContext &C = F.getContext(); // resume.entry: @@ -91,15 +280,16 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { IRBuilder<> Builder(NewEntry); auto *FramePtr = Shape.FramePtr; auto *FrameTy = Shape.FrameTy; - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32( - FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr"); + auto *GepIndex = Builder.CreateStructGEP( + FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); auto *Switch = Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); - Shape.ResumeSwitch = Switch; + Shape.SwitchLowering.ResumeSwitch = Switch; size_t SuspendIndex = 0; - for (CoroSuspendInst *S : Shape.CoroSuspends) { + for (auto *AnyS : Shape.CoroSuspends) { + auto *S = cast<CoroSuspendInst>(AnyS); ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); // Replace CoroSave with a store to Index: @@ -109,14 +299,15 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { Builder.SetInsertPoint(Save); if (S->isFinal()) { // Final suspend point is represented by storing zero in ResumeFnAddr. - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, - 0, "ResumeFn.addr"); + auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr, + coro::Shape::SwitchFieldIndex::Resume, + "ResumeFn.addr"); auto *NullPtr = ConstantPointerNull::get(cast<PointerType>( cast<PointerType>(GepIndex->getType())->getElementType())); Builder.CreateStore(NullPtr, GepIndex); } else { - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32( - FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr"); + auto *GepIndex = Builder.CreateStructGEP( + FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); Builder.CreateStore(IndexVal, GepIndex); } Save->replaceAllUsesWith(ConstantTokenNone::get(C)); @@ -164,48 +355,9 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { Builder.SetInsertPoint(UnreachBB); Builder.CreateUnreachable(); - return NewEntry; + Shape.SwitchLowering.ResumeEntryBlock = NewEntry; } -// In Resumers, we replace fallthrough coro.end with ret void and delete the -// rest of the block. -static void replaceFallthroughCoroEnd(IntrinsicInst *End, - ValueToValueMapTy &VMap) { - auto *NewE = cast<IntrinsicInst>(VMap[End]); - ReturnInst::Create(NewE->getContext(), nullptr, NewE); - - // Remove the rest of the block, by splitting it into an unreachable block. - auto *BB = NewE->getParent(); - BB->splitBasicBlock(NewE); - BB->getTerminator()->eraseFromParent(); -} - -// In Resumers, we replace unwind coro.end with True to force the immediate -// unwind to caller. -static void replaceUnwindCoroEnds(coro::Shape &Shape, ValueToValueMapTy &VMap) { - if (Shape.CoroEnds.empty()) - return; - - LLVMContext &Context = Shape.CoroEnds.front()->getContext(); - auto *True = ConstantInt::getTrue(Context); - for (CoroEndInst *CE : Shape.CoroEnds) { - if (!CE->isUnwind()) - continue; - - auto *NewCE = cast<IntrinsicInst>(VMap[CE]); - - // If coro.end has an associated bundle, add cleanupret instruction. - if (auto Bundle = NewCE->getOperandBundle(LLVMContext::OB_funclet)) { - Value *FromPad = Bundle->Inputs[0]; - auto *CleanupRet = CleanupReturnInst::Create(FromPad, nullptr, NewCE); - NewCE->getParent()->splitBasicBlock(NewCE); - CleanupRet->getParent()->getTerminator()->eraseFromParent(); - } - - NewCE->replaceAllUsesWith(True); - NewCE->eraseFromParent(); - } -} // Rewrite final suspend point handling. We do not use suspend index to // represent the final suspend point. Instead we zero-out ResumeFnAddr in the @@ -216,83 +368,364 @@ static void replaceUnwindCoroEnds(coro::Shape &Shape, ValueToValueMapTy &VMap) { // In the destroy function, we add a code sequence to check if ResumeFnAddress // is Null, and if so, jump to the appropriate label to handle cleanup from the // final suspend point. -static void handleFinalSuspend(IRBuilder<> &Builder, Value *FramePtr, - coro::Shape &Shape, SwitchInst *Switch, - bool IsDestroy) { - assert(Shape.HasFinalSuspend); +void CoroCloner::handleFinalSuspend() { + assert(Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.HasFinalSuspend); + auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]); auto FinalCaseIt = std::prev(Switch->case_end()); BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); Switch->removeCase(FinalCaseIt); - if (IsDestroy) { + if (isSwitchDestroyFunction()) { BasicBlock *OldSwitchBB = Switch->getParent(); auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); Builder.SetInsertPoint(OldSwitchBB->getTerminator()); - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, FramePtr, - 0, 0, "ResumeFn.addr"); - auto *Load = Builder.CreateLoad( - Shape.FrameTy->getElementType(coro::Shape::ResumeField), GepIndex); - auto *NullPtr = - ConstantPointerNull::get(cast<PointerType>(Load->getType())); - auto *Cond = Builder.CreateICmpEQ(Load, NullPtr); + auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr, + coro::Shape::SwitchFieldIndex::Resume, + "ResumeFn.addr"); + auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(), + GepIndex); + auto *Cond = Builder.CreateIsNull(Load); Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); OldSwitchBB->getTerminator()->eraseFromParent(); } } -// Create a resume clone by cloning the body of the original function, setting -// new entry block and replacing coro.suspend an appropriate value to force -// resume or cleanup pass for every suspend point. -static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, - BasicBlock *ResumeEntry, int8_t FnIndex) { - Module *M = F.getParent(); - auto *FrameTy = Shape.FrameTy; - auto *FnPtrTy = cast<PointerType>(FrameTy->getElementType(0)); - auto *FnTy = cast<FunctionType>(FnPtrTy->getElementType()); +static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, + const Twine &Suffix, + Module::iterator InsertBefore) { + Module *M = OrigF.getParent(); + auto *FnTy = Shape.getResumeFunctionType(); Function *NewF = - Function::Create(FnTy, GlobalValue::LinkageTypes::ExternalLinkage, - F.getName() + Suffix, M); + Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, + OrigF.getName() + Suffix); NewF->addParamAttr(0, Attribute::NonNull); NewF->addParamAttr(0, Attribute::NoAlias); - ValueToValueMapTy VMap; + M->getFunctionList().insert(InsertBefore, NewF); + + return NewF; +} + +/// Replace uses of the active llvm.coro.suspend.retcon call with the +/// arguments to the continuation function. +/// +/// This assumes that the builder has a meaningful insertion point. +void CoroCloner::replaceRetconSuspendUses() { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + + auto NewS = VMap[ActiveSuspend]; + if (NewS->use_empty()) return; + + // Copy out all the continuation arguments after the buffer pointer into + // an easily-indexed data structure for convenience. + SmallVector<Value*, 8> Args; + for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I) + Args.push_back(&*I); + + // If the suspend returns a single scalar value, we can just do a simple + // replacement. + if (!isa<StructType>(NewS->getType())) { + assert(Args.size() == 1); + NewS->replaceAllUsesWith(Args.front()); + return; + } + + // Try to peephole extracts of an aggregate return. + for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) { + auto EVI = dyn_cast<ExtractValueInst>((UI++)->getUser()); + if (!EVI || EVI->getNumIndices() != 1) + continue; + + EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]); + EVI->eraseFromParent(); + } + + // If we have no remaining uses, we're done. + if (NewS->use_empty()) return; + + // Otherwise, we need to create an aggregate. + Value *Agg = UndefValue::get(NewS->getType()); + for (size_t I = 0, E = Args.size(); I != E; ++I) + Agg = Builder.CreateInsertValue(Agg, Args[I], I); + + NewS->replaceAllUsesWith(Agg); +} + +void CoroCloner::replaceCoroSuspends() { + Value *SuspendResult; + + switch (Shape.ABI) { + // In switch lowering, replace coro.suspend with the appropriate value + // for the type of function we're extracting. + // Replacing coro.suspend with (0) will result in control flow proceeding to + // a resume label associated with a suspend point, replacing it with (1) will + // result in control flow proceeding to a cleanup label associated with this + // suspend point. + case coro::ABI::Switch: + SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); + break; + + // In returned-continuation lowering, the arguments from earlier + // continuations are theoretically arbitrary, and they should have been + // spilled. + case coro::ABI::RetconOnce: + case coro::ABI::Retcon: + return; + } + + for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { + // The active suspend was handled earlier. + if (CS == ActiveSuspend) continue; + + auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]); + MappedCS->replaceAllUsesWith(SuspendResult); + MappedCS->eraseFromParent(); + } +} + +void CoroCloner::replaceCoroEnds() { + for (CoroEndInst *CE : Shape.CoroEnds) { + // We use a null call graph because there's no call graph node for + // the cloned function yet. We'll just be rebuilding that later. + auto NewCE = cast<CoroEndInst>(VMap[CE]); + replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); + } +} + +static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, + ValueToValueMapTy *VMap) { + Value *CachedSlot = nullptr; + auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { + if (CachedSlot) { + assert(CachedSlot->getType()->getPointerElementType() == ValueTy && + "multiple swifterror slots in function with different types"); + return CachedSlot; + } + + // Check if the function has a swifterror argument. + for (auto &Arg : F.args()) { + if (Arg.isSwiftError()) { + CachedSlot = &Arg; + assert(Arg.getType()->getPointerElementType() == ValueTy && + "swifterror argument does not have expected type"); + return &Arg; + } + } + + // Create a swifterror alloca. + IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); + auto Alloca = Builder.CreateAlloca(ValueTy); + Alloca->setSwiftError(true); + + CachedSlot = Alloca; + return Alloca; + }; + + for (CallInst *Op : Shape.SwiftErrorOps) { + auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op; + IRBuilder<> Builder(MappedOp); + + // If there are no arguments, this is a 'get' operation. + Value *MappedResult; + if (Op->getNumArgOperands() == 0) { + auto ValueTy = Op->getType(); + auto Slot = getSwiftErrorSlot(ValueTy); + MappedResult = Builder.CreateLoad(ValueTy, Slot); + } else { + assert(Op->getNumArgOperands() == 1); + auto Value = MappedOp->getArgOperand(0); + auto ValueTy = Value->getType(); + auto Slot = getSwiftErrorSlot(ValueTy); + Builder.CreateStore(Value, Slot); + MappedResult = Slot; + } + + MappedOp->replaceAllUsesWith(MappedResult); + MappedOp->eraseFromParent(); + } + + // If we're updating the original function, we've invalidated SwiftErrorOps. + if (VMap == nullptr) { + Shape.SwiftErrorOps.clear(); + } +} + +void CoroCloner::replaceSwiftErrorOps() { + ::replaceSwiftErrorOps(*NewF, Shape, &VMap); +} + +void CoroCloner::replaceEntryBlock() { + // In the original function, the AllocaSpillBlock is a block immediately + // following the allocation of the frame object which defines GEPs for + // all the allocas that have been moved into the frame, and it ends by + // branching to the original beginning of the coroutine. Make this + // the entry block of the cloned function. + auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]); + Entry->setName("entry" + Suffix); + Entry->moveBefore(&NewF->getEntryBlock()); + Entry->getTerminator()->eraseFromParent(); + + // Clear all predecessors of the new entry block. There should be + // exactly one predecessor, which we created when splitting out + // AllocaSpillBlock to begin with. + assert(Entry->hasOneUse()); + auto BranchToEntry = cast<BranchInst>(Entry->user_back()); + assert(BranchToEntry->isUnconditional()); + Builder.SetInsertPoint(BranchToEntry); + Builder.CreateUnreachable(); + BranchToEntry->eraseFromParent(); + + // TODO: move any allocas into Entry that weren't moved into the frame. + // (Currently we move all allocas into the frame.) + + // Branch from the entry to the appropriate place. + Builder.SetInsertPoint(Entry); + switch (Shape.ABI) { + case coro::ABI::Switch: { + // In switch-lowering, we built a resume-entry block in the original + // function. Make the entry block branch to this. + auto *SwitchBB = + cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]); + Builder.CreateBr(SwitchBB); + break; + } + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + // In continuation ABIs, we want to branch to immediately after the + // active suspend point. Earlier phases will have put the suspend in its + // own basic block, so just thread our jump directly to its successor. + auto MappedCS = cast<CoroSuspendRetconInst>(VMap[ActiveSuspend]); + auto Branch = cast<BranchInst>(MappedCS->getNextNode()); + assert(Branch->isUnconditional()); + Builder.CreateBr(Branch->getSuccessor(0)); + break; + } + } +} + +/// Derive the value of the new frame pointer. +Value *CoroCloner::deriveNewFramePointer() { + // Builder should be inserting to the front of the new entry block. + + switch (Shape.ABI) { + // In switch-lowering, the argument is the frame pointer. + case coro::ABI::Switch: + return &*NewF->arg_begin(); + + // In continuation-lowering, the argument is the opaque storage. + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + Argument *NewStorage = &*NewF->arg_begin(); + auto FramePtrTy = Shape.FrameTy->getPointerTo(); + + // If the storage is inline, just bitcast to the storage to the frame type. + if (Shape.RetconLowering.IsFrameInlineInStorage) + return Builder.CreateBitCast(NewStorage, FramePtrTy); + + // Otherwise, load the real frame from the opaque storage. + auto FramePtrPtr = + Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo()); + return Builder.CreateLoad(FramePtrPtr); + } + } + llvm_unreachable("bad ABI"); +} + +/// Clone the body of the original function into a resume function of +/// some sort. +void CoroCloner::create() { + // Create the new function if we don't already have one. + if (!NewF) { + NewF = createCloneDeclaration(OrigF, Shape, Suffix, + OrigF.getParent()->end()); + } + // Replace all args with undefs. The buildCoroutineFrame algorithm already // rewritten access to the args that occurs after suspend points with loads // and stores to/from the coroutine frame. - for (Argument &A : F.args()) + for (Argument &A : OrigF.args()) VMap[&A] = UndefValue::get(A.getType()); SmallVector<ReturnInst *, 4> Returns; - CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns); - NewF->setLinkage(GlobalValue::LinkageTypes::InternalLinkage); + // Ignore attempts to change certain attributes of the function. + // TODO: maybe there should be a way to suppress this during cloning? + auto savedVisibility = NewF->getVisibility(); + auto savedUnnamedAddr = NewF->getUnnamedAddr(); + auto savedDLLStorageClass = NewF->getDLLStorageClass(); + + // NewF's linkage (which CloneFunctionInto does *not* change) might not + // be compatible with the visibility of OrigF (which it *does* change), + // so protect against that. + auto savedLinkage = NewF->getLinkage(); + NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); + + CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns); + + NewF->setLinkage(savedLinkage); + NewF->setVisibility(savedVisibility); + NewF->setUnnamedAddr(savedUnnamedAddr); + NewF->setDLLStorageClass(savedDLLStorageClass); + + auto &Context = NewF->getContext(); + + // Replace the attributes of the new function: + auto OrigAttrs = NewF->getAttributes(); + auto NewAttrs = AttributeList(); + + switch (Shape.ABI) { + case coro::ABI::Switch: + // Bootstrap attributes by copying function attributes from the + // original function. This should include optimization settings and so on. + NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, + OrigAttrs.getFnAttributes()); + break; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + // If we have a continuation prototype, just use its attributes, + // full-stop. + NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); + break; + } - // Remove old returns. - for (ReturnInst *Return : Returns) - changeToUnreachable(Return, /*UseLLVMTrap=*/false); + // Make the frame parameter nonnull and noalias. + NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull); + NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias); + + switch (Shape.ABI) { + // In these ABIs, the cloned functions always return 'void', and the + // existing return sites are meaningless. Note that for unique + // continuations, this includes the returns associated with suspends; + // this is fine because we can't suspend twice. + case coro::ABI::Switch: + case coro::ABI::RetconOnce: + // Remove old returns. + for (ReturnInst *Return : Returns) + changeToUnreachable(Return, /*UseLLVMTrap=*/false); + break; + + // With multi-suspend continuations, we'll already have eliminated the + // original returns and inserted returns before all the suspend points, + // so we want to leave any returns in place. + case coro::ABI::Retcon: + break; + } - // Remove old return attributes. - NewF->removeAttributes( - AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewF->getReturnType())); + NewF->setAttributes(NewAttrs); + NewF->setCallingConv(Shape.getResumeFunctionCC()); - // Make AllocaSpillBlock the new entry block. - auto *SwitchBB = cast<BasicBlock>(VMap[ResumeEntry]); - auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]); - Entry->moveBefore(&NewF->getEntryBlock()); - Entry->getTerminator()->eraseFromParent(); - BranchInst::Create(SwitchBB, Entry); - Entry->setName("entry" + Suffix); + // Set up the new entry block. + replaceEntryBlock(); - // Clear all predecessors of the new entry block. - auto *Switch = cast<SwitchInst>(VMap[Shape.ResumeSwitch]); - Entry->replaceAllUsesWith(Switch->getDefaultDest()); - - IRBuilder<> Builder(&NewF->getEntryBlock().front()); + Builder.SetInsertPoint(&NewF->getEntryBlock().front()); + NewFramePtr = deriveNewFramePointer(); // Remap frame pointer. - Argument *NewFramePtr = &*NewF->arg_begin(); - Value *OldFramePtr = cast<Value>(VMap[Shape.FramePtr]); + Value *OldFramePtr = VMap[Shape.FramePtr]; NewFramePtr->takeName(OldFramePtr); OldFramePtr->replaceAllUsesWith(NewFramePtr); @@ -302,50 +735,55 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]); OldVFrame->replaceAllUsesWith(NewVFrame); - // Rewrite final suspend handling as it is not done via switch (allows to - // remove final case from the switch, since it is undefined behavior to resume - // the coroutine suspended at the final suspend point. - if (Shape.HasFinalSuspend) { - auto *Switch = cast<SwitchInst>(VMap[Shape.ResumeSwitch]); - bool IsDestroy = FnIndex != 0; - handleFinalSuspend(Builder, NewFramePtr, Shape, Switch, IsDestroy); + switch (Shape.ABI) { + case coro::ABI::Switch: + // Rewrite final suspend handling as it is not done via switch (allows to + // remove final case from the switch, since it is undefined behavior to + // resume the coroutine suspended at the final suspend point. + if (Shape.SwitchLowering.HasFinalSuspend) + handleFinalSuspend(); + break; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + // Replace uses of the active suspend with the corresponding + // continuation-function arguments. + assert(ActiveSuspend != nullptr && + "no active suspend when lowering a continuation-style coroutine"); + replaceRetconSuspendUses(); + break; } - // Replace coro suspend with the appropriate resume index. - // Replacing coro.suspend with (0) will result in control flow proceeding to - // a resume label associated with a suspend point, replacing it with (1) will - // result in control flow proceeding to a cleanup label associated with this - // suspend point. - auto *NewValue = Builder.getInt8(FnIndex ? 1 : 0); - for (CoroSuspendInst *CS : Shape.CoroSuspends) { - auto *MappedCS = cast<CoroSuspendInst>(VMap[CS]); - MappedCS->replaceAllUsesWith(NewValue); - MappedCS->eraseFromParent(); - } + // Handle suspends. + replaceCoroSuspends(); + + // Handle swifterror. + replaceSwiftErrorOps(); // Remove coro.end intrinsics. - replaceFallthroughCoroEnd(Shape.CoroEnds.front(), VMap); - replaceUnwindCoroEnds(Shape, VMap); + replaceCoroEnds(); + // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, // to suppress deallocation code. - coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]), - /*Elide=*/FnIndex == 2); - - NewF->setCallingConv(CallingConv::Fast); - - return NewF; + if (Shape.ABI == coro::ABI::Switch) + coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]), + /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup); } -static void removeCoroEnds(coro::Shape &Shape) { - if (Shape.CoroEnds.empty()) - return; - - LLVMContext &Context = Shape.CoroEnds.front()->getContext(); - auto *False = ConstantInt::getFalse(Context); +// Create a resume clone by cloning the body of the original function, setting +// new entry block and replacing coro.suspend an appropriate value to force +// resume or cleanup pass for every suspend point. +static Function *createClone(Function &F, const Twine &Suffix, + coro::Shape &Shape, CoroCloner::Kind FKind) { + CoroCloner Cloner(F, Suffix, Shape, FKind); + Cloner.create(); + return Cloner.getFunction(); +} - for (CoroEndInst *CE : Shape.CoroEnds) { - CE->replaceAllUsesWith(False); - CE->eraseFromParent(); +/// Remove calls to llvm.coro.end in the original function. +static void removeCoroEnds(coro::Shape &Shape, CallGraph *CG) { + for (auto End : Shape.CoroEnds) { + replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG); } } @@ -377,8 +815,12 @@ static void replaceFrameSize(coro::Shape &Shape) { // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*)) // // Assumes that all the functions have the same signature. -static void setCoroInfo(Function &F, CoroBeginInst *CoroBegin, - std::initializer_list<Function *> Fns) { +static void setCoroInfo(Function &F, coro::Shape &Shape, + ArrayRef<Function *> Fns) { + // This only works under the switch-lowering ABI because coro elision + // only works on the switch-lowering ABI. + assert(Shape.ABI == coro::ABI::Switch); + SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end()); assert(!Args.empty()); Function *Part = *Fns.begin(); @@ -393,38 +835,45 @@ static void setCoroInfo(Function &F, CoroBeginInst *CoroBegin, // Update coro.begin instruction to refer to this constant. LLVMContext &C = F.getContext(); auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C)); - CoroBegin->getId()->setInfo(BC); + Shape.getSwitchCoroId()->setInfo(BC); } // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, Function *DestroyFn, Function *CleanupFn) { + assert(Shape.ABI == coro::ABI::Switch); + IRBuilder<> Builder(Shape.FramePtr->getNextNode()); - auto *ResumeAddr = Builder.CreateConstInBoundsGEP2_32( - Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::ResumeField, + auto *ResumeAddr = Builder.CreateStructGEP( + Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, "resume.addr"); Builder.CreateStore(ResumeFn, ResumeAddr); Value *DestroyOrCleanupFn = DestroyFn; - CoroIdInst *CoroId = Shape.CoroBegin->getId(); + CoroIdInst *CoroId = Shape.getSwitchCoroId(); if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { // If there is a CoroAlloc and it returns false (meaning we elide the // allocation, use CleanupFn instead of DestroyFn). DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); } - auto *DestroyAddr = Builder.CreateConstInBoundsGEP2_32( - Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::DestroyField, + auto *DestroyAddr = Builder.CreateStructGEP( + Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy, "destroy.addr"); Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); } static void postSplitCleanup(Function &F) { removeUnreachableBlocks(F); + + // For now, we do a mandatory verification step because we don't + // entirely trust this pass. Note that we don't want to add a verifier + // pass to FPM below because it will also verify all the global data. + verifyFunction(F); + legacy::FunctionPassManager FPM(F.getParent()); - FPM.add(createVerifierPass()); FPM.add(createSCCPPass()); FPM.add(createCFGSimplificationPass()); FPM.add(createEarlyCSEPass()); @@ -520,21 +969,34 @@ static void addMustTailToCoroResumes(Function &F) { // Coroutine has no suspend points. Remove heap allocation for the coroutine // frame if possible. -static void handleNoSuspendCoroutine(CoroBeginInst *CoroBegin, Type *FrameTy) { +static void handleNoSuspendCoroutine(coro::Shape &Shape) { + auto *CoroBegin = Shape.CoroBegin; auto *CoroId = CoroBegin->getId(); auto *AllocInst = CoroId->getCoroAlloc(); - coro::replaceCoroFree(CoroId, /*Elide=*/AllocInst != nullptr); - if (AllocInst) { - IRBuilder<> Builder(AllocInst); - // FIXME: Need to handle overaligned members. - auto *Frame = Builder.CreateAlloca(FrameTy); - auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); - AllocInst->replaceAllUsesWith(Builder.getFalse()); - AllocInst->eraseFromParent(); - CoroBegin->replaceAllUsesWith(VFrame); - } else { - CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); + switch (Shape.ABI) { + case coro::ABI::Switch: { + auto SwitchId = cast<CoroIdInst>(CoroId); + coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); + if (AllocInst) { + IRBuilder<> Builder(AllocInst); + // FIXME: Need to handle overaligned members. + auto *Frame = Builder.CreateAlloca(Shape.FrameTy); + auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); + AllocInst->replaceAllUsesWith(Builder.getFalse()); + AllocInst->eraseFromParent(); + CoroBegin->replaceAllUsesWith(VFrame); + } else { + CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); + } + break; + } + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType())); + break; } + CoroBegin->eraseFromParent(); } @@ -670,12 +1132,16 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend, // Remove suspend points that are simplified. static void simplifySuspendPoints(coro::Shape &Shape) { + // Currently, the only simplification we do is switch-lowering-specific. + if (Shape.ABI != coro::ABI::Switch) + return; + auto &S = Shape.CoroSuspends; size_t I = 0, N = S.size(); if (N == 0) return; while (true) { - if (simplifySuspendPoint(S[I], Shape.CoroBegin)) { + if (simplifySuspendPoint(cast<CoroSuspendInst>(S[I]), Shape.CoroBegin)) { if (--N == I) break; std::swap(S[I], S[N]); @@ -687,142 +1153,227 @@ static void simplifySuspendPoints(coro::Shape &Shape) { S.resize(N); } -static SmallPtrSet<BasicBlock *, 4> getCoroBeginPredBlocks(CoroBeginInst *CB) { - // Collect all blocks that we need to look for instructions to relocate. - SmallPtrSet<BasicBlock *, 4> RelocBlocks; - SmallVector<BasicBlock *, 4> Work; - Work.push_back(CB->getParent()); +static void splitSwitchCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl<Function *> &Clones) { + assert(Shape.ABI == coro::ABI::Switch); - do { - BasicBlock *Current = Work.pop_back_val(); - for (BasicBlock *BB : predecessors(Current)) - if (RelocBlocks.count(BB) == 0) { - RelocBlocks.insert(BB); - Work.push_back(BB); - } - } while (!Work.empty()); - return RelocBlocks; -} - -static SmallPtrSet<Instruction *, 8> -getNotRelocatableInstructions(CoroBeginInst *CoroBegin, - SmallPtrSetImpl<BasicBlock *> &RelocBlocks) { - SmallPtrSet<Instruction *, 8> DoNotRelocate; - // Collect all instructions that we should not relocate - SmallVector<Instruction *, 8> Work; - - // Start with CoroBegin and terminators of all preceding blocks. - Work.push_back(CoroBegin); - BasicBlock *CoroBeginBB = CoroBegin->getParent(); - for (BasicBlock *BB : RelocBlocks) - if (BB != CoroBeginBB) - Work.push_back(BB->getTerminator()); - - // For every instruction in the Work list, place its operands in DoNotRelocate - // set. - do { - Instruction *Current = Work.pop_back_val(); - LLVM_DEBUG(dbgs() << "CoroSplit: Will not relocate: " << *Current << "\n"); - DoNotRelocate.insert(Current); - for (Value *U : Current->operands()) { - auto *I = dyn_cast<Instruction>(U); - if (!I) - continue; + createResumeEntryBlock(F, Shape); + auto ResumeClone = createClone(F, ".resume", Shape, + CoroCloner::Kind::SwitchResume); + auto DestroyClone = createClone(F, ".destroy", Shape, + CoroCloner::Kind::SwitchUnwind); + auto CleanupClone = createClone(F, ".cleanup", Shape, + CoroCloner::Kind::SwitchCleanup); - if (auto *A = dyn_cast<AllocaInst>(I)) { - // Stores to alloca instructions that occur before the coroutine frame - // is allocated should not be moved; the stored values may be used by - // the coroutine frame allocator. The operands to those stores must also - // remain in place. - for (const auto &User : A->users()) - if (auto *SI = dyn_cast<llvm::StoreInst>(User)) - if (RelocBlocks.count(SI->getParent()) != 0 && - DoNotRelocate.count(SI) == 0) { - Work.push_back(SI); - DoNotRelocate.insert(SI); - } - continue; - } + postSplitCleanup(*ResumeClone); + postSplitCleanup(*DestroyClone); + postSplitCleanup(*CleanupClone); + + addMustTailToCoroResumes(*ResumeClone); + + // Store addresses resume/destroy/cleanup functions in the coroutine frame. + updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); + + assert(Clones.empty()); + Clones.push_back(ResumeClone); + Clones.push_back(DestroyClone); + Clones.push_back(CleanupClone); + + // Create a constant array referring to resume/destroy/clone functions pointed + // by the last argument of @llvm.coro.info, so that CoroElide pass can + // determined correct function to call. + setCoroInfo(F, Shape, Clones); +} - if (DoNotRelocate.count(I) == 0) { - Work.push_back(I); - DoNotRelocate.insert(I); +static void splitRetconCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl<Function *> &Clones) { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + assert(Clones.empty()); + + // Reset various things that the optimizer might have decided it + // "knows" about the coroutine function due to not seeing a return. + F.removeFnAttr(Attribute::NoReturn); + F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull); + + // Allocate the frame. + auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId()); + Value *RawFramePtr; + if (Shape.RetconLowering.IsFrameInlineInStorage) { + RawFramePtr = Id->getStorage(); + } else { + IRBuilder<> Builder(Id); + + // Determine the size of the frame. + const DataLayout &DL = F.getParent()->getDataLayout(); + auto Size = DL.getTypeAllocSize(Shape.FrameTy); + + // Allocate. We don't need to update the call graph node because we're + // going to recompute it from scratch after splitting. + RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); + RawFramePtr = + Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); + + // Stash the allocated frame pointer in the continuation storage. + auto Dest = Builder.CreateBitCast(Id->getStorage(), + RawFramePtr->getType()->getPointerTo()); + Builder.CreateStore(RawFramePtr, Dest); + } + + // Map all uses of llvm.coro.begin to the allocated frame pointer. + { + // Make sure we don't invalidate Shape.FramePtr. + TrackingVH<Instruction> Handle(Shape.FramePtr); + Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); + Shape.FramePtr = Handle.getValPtr(); + } + + // Create a unique return block. + BasicBlock *ReturnBB = nullptr; + SmallVector<PHINode *, 4> ReturnPHIs; + + // Create all the functions in order after the main function. + auto NextF = std::next(F.getIterator()); + + // Create a continuation function for each of the suspend points. + Clones.reserve(Shape.CoroSuspends.size()); + for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { + auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]); + + // Create the clone declaration. + auto Continuation = + createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF); + Clones.push_back(Continuation); + + // Insert a branch to the unified return block immediately before + // the suspend point. + auto SuspendBB = Suspend->getParent(); + auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); + auto Branch = cast<BranchInst>(SuspendBB->getTerminator()); + + // Create the unified return block. + if (!ReturnBB) { + // Place it before the first suspend. + ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F, + NewSuspendBB); + Shape.RetconLowering.ReturnBlock = ReturnBB; + + IRBuilder<> Builder(ReturnBB); + + // Create PHIs for all the return values. + assert(ReturnPHIs.empty()); + + // First, the continuation. + ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(), + Shape.CoroSuspends.size())); + + // Next, all the directly-yielded values. + for (auto ResultTy : Shape.getRetconResultTypes()) + ReturnPHIs.push_back(Builder.CreatePHI(ResultTy, + Shape.CoroSuspends.size())); + + // Build the return value. + auto RetTy = F.getReturnType(); + + // Cast the continuation value if necessary. + // We can't rely on the types matching up because that type would + // have to be infinite. + auto CastedContinuationTy = + (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0)); + auto *CastedContinuation = + Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy); + + Value *RetV; + if (ReturnPHIs.size() == 1) { + RetV = CastedContinuation; + } else { + RetV = UndefValue::get(RetTy); + RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0); + for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I) + RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I); } + + Builder.CreateRet(RetV); } - } while (!Work.empty()); - return DoNotRelocate; -} -static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) { - // Analyze which non-alloca instructions are needed for allocation and - // relocate the rest to after coro.begin. We need to do it, since some of the - // targets of those instructions may be placed into coroutine frame memory - // for which becomes available after coro.begin intrinsic. + // Branch to the return block. + Branch->setSuccessor(0, ReturnBB); + ReturnPHIs[0]->addIncoming(Continuation, SuspendBB); + size_t NextPHIIndex = 1; + for (auto &VUse : Suspend->value_operands()) + ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB); + assert(NextPHIIndex == ReturnPHIs.size()); + } - auto BlockSet = getCoroBeginPredBlocks(CoroBegin); - auto DoNotRelocateSet = getNotRelocatableInstructions(CoroBegin, BlockSet); + assert(Clones.size() == Shape.CoroSuspends.size()); + for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { + auto Suspend = Shape.CoroSuspends[i]; + auto Clone = Clones[i]; - Instruction *InsertPt = CoroBegin->getNextNode(); - BasicBlock &BB = F.getEntryBlock(); // TODO: Look at other blocks as well. - for (auto B = BB.begin(), E = BB.end(); B != E;) { - Instruction &I = *B++; - if (isa<AllocaInst>(&I)) - continue; - if (&I == CoroBegin) - break; - if (DoNotRelocateSet.count(&I)) - continue; - I.moveBefore(InsertPt); + CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create(); + } +} + +namespace { + class PrettyStackTraceFunction : public PrettyStackTraceEntry { + Function &F; + public: + PrettyStackTraceFunction(Function &F) : F(F) {} + void print(raw_ostream &OS) const override { + OS << "While splitting coroutine "; + F.printAsOperand(OS, /*print type*/ false, F.getParent()); + OS << "\n"; + } + }; +} + +static void splitCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl<Function *> &Clones) { + switch (Shape.ABI) { + case coro::ABI::Switch: + return splitSwitchCoroutine(F, Shape, Clones); + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return splitRetconCoroutine(F, Shape, Clones); } + llvm_unreachable("bad ABI kind"); } static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { - EliminateUnreachableBlocks(F); + PrettyStackTraceFunction prettyStackTrace(F); + + // The suspend-crossing algorithm in buildCoroutineFrame get tripped + // up by uses in unreachable blocks, so remove them as a first pass. + removeUnreachableBlocks(F); coro::Shape Shape(F); if (!Shape.CoroBegin) return; simplifySuspendPoints(Shape); - relocateInstructionBefore(Shape.CoroBegin, F); buildCoroutineFrame(F, Shape); replaceFrameSize(Shape); + SmallVector<Function*, 4> Clones; + // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. if (Shape.CoroSuspends.empty()) { - handleNoSuspendCoroutine(Shape.CoroBegin, Shape.FrameTy); - removeCoroEnds(Shape); - postSplitCleanup(F); - coro::updateCallGraph(F, {}, CG, SCC); - return; + handleNoSuspendCoroutine(Shape); + } else { + splitCoroutine(F, Shape, Clones); } - auto *ResumeEntry = createResumeEntryBlock(F, Shape); - auto ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0); - auto DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1); - auto CleanupClone = createClone(F, ".cleanup", Shape, ResumeEntry, 2); - - // We no longer need coro.end in F. - removeCoroEnds(Shape); + // Replace all the swifterror operations in the original function. + // This invalidates SwiftErrorOps in the Shape. + replaceSwiftErrorOps(F, Shape, nullptr); + removeCoroEnds(Shape, &CG); postSplitCleanup(F); - postSplitCleanup(*ResumeClone); - postSplitCleanup(*DestroyClone); - postSplitCleanup(*CleanupClone); - - addMustTailToCoroResumes(*ResumeClone); - - // Store addresses resume/destroy/cleanup functions in the coroutine frame. - updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); - - // Create a constant array referring to resume/destroy/clone functions pointed - // by the last argument of @llvm.coro.info, so that CoroElide pass can - // determined correct function to call. - setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone}); // Update call graph and add the functions we created to the SCC. - coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC); + coro::updateCallGraph(F, Clones, CG, SCC); } // When we see the coroutine the first time, we insert an indirect call to a @@ -881,6 +1432,80 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) { SCC.initialize(Nodes); } +/// Replace a call to llvm.coro.prepare.retcon. +static void replacePrepare(CallInst *Prepare, CallGraph &CG) { + auto CastFn = Prepare->getArgOperand(0); // as an i8* + auto Fn = CastFn->stripPointerCasts(); // as its original type + + // Find call graph nodes for the preparation. + CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr; + if (auto ConcreteFn = dyn_cast<Function>(Fn)) { + PrepareUserNode = CG[Prepare->getFunction()]; + FnNode = CG[ConcreteFn]; + } + + // Attempt to peephole this pattern: + // %0 = bitcast [[TYPE]] @some_function to i8* + // %1 = call @llvm.coro.prepare.retcon(i8* %0) + // %2 = bitcast %1 to [[TYPE]] + // ==> + // %2 = @some_function + for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); + UI != UE; ) { + // Look for bitcasts back to the original function type. + auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser()); + if (!Cast || Cast->getType() != Fn->getType()) continue; + + // Check whether the replacement will introduce new direct calls. + // If so, we'll need to update the call graph. + if (PrepareUserNode) { + for (auto &Use : Cast->uses()) { + if (auto *CB = dyn_cast<CallBase>(Use.getUser())) { + if (!CB->isCallee(&Use)) + continue; + PrepareUserNode->removeCallEdgeFor(*CB); + PrepareUserNode->addCalledFunction(CB, FnNode); + } + } + } + + // Replace and remove the cast. + Cast->replaceAllUsesWith(Fn); + Cast->eraseFromParent(); + } + + // Replace any remaining uses with the function as an i8*. + // This can never directly be a callee, so we don't need to update CG. + Prepare->replaceAllUsesWith(CastFn); + Prepare->eraseFromParent(); + + // Kill dead bitcasts. + while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) { + if (!Cast->use_empty()) break; + CastFn = Cast->getOperand(0); + Cast->eraseFromParent(); + } +} + +/// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent +/// IPO from operating on calls to a retcon coroutine before it's been +/// split. This is only safe to do after we've split all retcon +/// coroutines in the module. We can do that this in this pass because +/// this pass does promise to split all retcon coroutines (as opposed to +/// switch coroutines, which are lowered in multiple stages). +static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) { + bool Changed = false; + for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); + PI != PE; ) { + // Intrinsics can only be used in calls. + auto *Prepare = cast<CallInst>((PI++)->getUser()); + replacePrepare(Prepare, CG); + Changed = true; + } + + return Changed; +} + //===----------------------------------------------------------------------===// // Top Level Driver //===----------------------------------------------------------------------===// @@ -899,7 +1524,9 @@ struct CoroSplit : public CallGraphSCCPass { // A coroutine is identified by the presence of coro.begin intrinsic, if // we don't have any, this pass has nothing to do. bool doInitialization(CallGraph &CG) override { - Run = coro::declaresIntrinsics(CG.getModule(), {"llvm.coro.begin"}); + Run = coro::declaresIntrinsics(CG.getModule(), + {"llvm.coro.begin", + "llvm.coro.prepare.retcon"}); return CallGraphSCCPass::doInitialization(CG); } @@ -907,6 +1534,12 @@ struct CoroSplit : public CallGraphSCCPass { if (!Run) return false; + // Check for uses of llvm.coro.prepare.retcon. + auto PrepareFn = + SCC.getCallGraph().getModule().getFunction("llvm.coro.prepare.retcon"); + if (PrepareFn && PrepareFn->use_empty()) + PrepareFn = nullptr; + // Find coroutines for processing. SmallVector<Function *, 4> Coroutines; for (CallGraphNode *CGN : SCC) @@ -914,12 +1547,17 @@ struct CoroSplit : public CallGraphSCCPass { if (F->hasFnAttribute(CORO_PRESPLIT_ATTR)) Coroutines.push_back(F); - if (Coroutines.empty()) + if (Coroutines.empty() && !PrepareFn) return false; CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + + if (Coroutines.empty()) + return replaceAllPrepares(PrepareFn, CG); + createDevirtTriggerFunc(CG, SCC); + // Split all the coroutines. for (Function *F : Coroutines) { Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR); StringRef Value = Attr.getValueAsString(); @@ -932,6 +1570,10 @@ struct CoroSplit : public CallGraphSCCPass { F->removeFnAttr(CORO_PRESPLIT_ATTR); splitCoroutine(*F, CG, SCC); } + + if (PrepareFn) + replaceAllPrepares(PrepareFn, CG); + return true; } diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp index a581d1d21169..f39483b27518 100644 --- a/lib/Transforms/Coroutines/Coroutines.cpp +++ b/lib/Transforms/Coroutines/Coroutines.cpp @@ -123,12 +123,26 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index, static bool isCoroutineIntrinsicName(StringRef Name) { // NOTE: Must be sorted! static const char *const CoroIntrinsics[] = { - "llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.destroy", - "llvm.coro.done", "llvm.coro.end", "llvm.coro.frame", - "llvm.coro.free", "llvm.coro.id", "llvm.coro.noop", - "llvm.coro.param", "llvm.coro.promise", "llvm.coro.resume", - "llvm.coro.save", "llvm.coro.size", "llvm.coro.subfn.addr", + "llvm.coro.alloc", + "llvm.coro.begin", + "llvm.coro.destroy", + "llvm.coro.done", + "llvm.coro.end", + "llvm.coro.frame", + "llvm.coro.free", + "llvm.coro.id", + "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once", + "llvm.coro.noop", + "llvm.coro.param", + "llvm.coro.prepare.retcon", + "llvm.coro.promise", + "llvm.coro.resume", + "llvm.coro.save", + "llvm.coro.size", + "llvm.coro.subfn.addr", "llvm.coro.suspend", + "llvm.coro.suspend.retcon", }; return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1; } @@ -217,9 +231,6 @@ static void clear(coro::Shape &Shape) { Shape.FrameTy = nullptr; Shape.FramePtr = nullptr; Shape.AllocaSpillBlock = nullptr; - Shape.ResumeSwitch = nullptr; - Shape.PromiseAlloca = nullptr; - Shape.HasFinalSuspend = false; } static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin, @@ -235,6 +246,7 @@ static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin, // Collect "interesting" coroutine intrinsics. void coro::Shape::buildFrom(Function &F) { + bool HasFinalSuspend = false; size_t FinalSuspendIndex = 0; clear(*this); SmallVector<CoroFrameInst *, 8> CoroFrames; @@ -257,9 +269,15 @@ void coro::Shape::buildFrom(Function &F) { if (II->use_empty()) UnusedCoroSaves.push_back(cast<CoroSaveInst>(II)); break; - case Intrinsic::coro_suspend: - CoroSuspends.push_back(cast<CoroSuspendInst>(II)); - if (CoroSuspends.back()->isFinal()) { + case Intrinsic::coro_suspend_retcon: { + auto Suspend = cast<CoroSuspendRetconInst>(II); + CoroSuspends.push_back(Suspend); + break; + } + case Intrinsic::coro_suspend: { + auto Suspend = cast<CoroSuspendInst>(II); + CoroSuspends.push_back(Suspend); + if (Suspend->isFinal()) { if (HasFinalSuspend) report_fatal_error( "Only one suspend point can be marked as final"); @@ -267,18 +285,23 @@ void coro::Shape::buildFrom(Function &F) { FinalSuspendIndex = CoroSuspends.size() - 1; } break; + } case Intrinsic::coro_begin: { auto CB = cast<CoroBeginInst>(II); - if (CB->getId()->getInfo().isPreSplit()) { - if (CoroBegin) - report_fatal_error( + + // Ignore coro id's that aren't pre-split. + auto Id = dyn_cast<CoroIdInst>(CB->getId()); + if (Id && !Id->getInfo().isPreSplit()) + break; + + if (CoroBegin) + report_fatal_error( "coroutine should have exactly one defining @llvm.coro.begin"); - CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); - CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); - CB->removeAttribute(AttributeList::FunctionIndex, - Attribute::NoDuplicate); - CoroBegin = CB; - } + CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); + CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + CB->removeAttribute(AttributeList::FunctionIndex, + Attribute::NoDuplicate); + CoroBegin = CB; break; } case Intrinsic::coro_end: @@ -310,7 +333,7 @@ void coro::Shape::buildFrom(Function &F) { // Replace all coro.suspend with undef and remove related coro.saves if // present. - for (CoroSuspendInst *CS : CoroSuspends) { + for (AnyCoroSuspendInst *CS : CoroSuspends) { CS->replaceAllUsesWith(UndefValue::get(CS->getType())); CS->eraseFromParent(); if (auto *CoroSave = CS->getCoroSave()) @@ -324,19 +347,136 @@ void coro::Shape::buildFrom(Function &F) { return; } + auto Id = CoroBegin->getId(); + switch (auto IdIntrinsic = Id->getIntrinsicID()) { + case Intrinsic::coro_id: { + auto SwitchId = cast<CoroIdInst>(Id); + this->ABI = coro::ABI::Switch; + this->SwitchLowering.HasFinalSuspend = HasFinalSuspend; + this->SwitchLowering.ResumeSwitch = nullptr; + this->SwitchLowering.PromiseAlloca = SwitchId->getPromise(); + this->SwitchLowering.ResumeEntryBlock = nullptr; + + for (auto AnySuspend : CoroSuspends) { + auto Suspend = dyn_cast<CoroSuspendInst>(AnySuspend); + if (!Suspend) { +#ifndef NDEBUG + AnySuspend->dump(); +#endif + report_fatal_error("coro.id must be paired with coro.suspend"); + } + + if (!Suspend->getCoroSave()) + createCoroSave(CoroBegin, Suspend); + } + break; + } + + case Intrinsic::coro_id_retcon: + case Intrinsic::coro_id_retcon_once: { + auto ContinuationId = cast<AnyCoroIdRetconInst>(Id); + ContinuationId->checkWellFormed(); + this->ABI = (IdIntrinsic == Intrinsic::coro_id_retcon + ? coro::ABI::Retcon + : coro::ABI::RetconOnce); + auto Prototype = ContinuationId->getPrototype(); + this->RetconLowering.ResumePrototype = Prototype; + this->RetconLowering.Alloc = ContinuationId->getAllocFunction(); + this->RetconLowering.Dealloc = ContinuationId->getDeallocFunction(); + this->RetconLowering.ReturnBlock = nullptr; + this->RetconLowering.IsFrameInlineInStorage = false; + + // Determine the result value types, and make sure they match up with + // the values passed to the suspends. + auto ResultTys = getRetconResultTypes(); + auto ResumeTys = getRetconResumeTypes(); + + for (auto AnySuspend : CoroSuspends) { + auto Suspend = dyn_cast<CoroSuspendRetconInst>(AnySuspend); + if (!Suspend) { +#ifndef NDEBUG + AnySuspend->dump(); +#endif + report_fatal_error("coro.id.retcon.* must be paired with " + "coro.suspend.retcon"); + } + + // Check that the argument types of the suspend match the results. + auto SI = Suspend->value_begin(), SE = Suspend->value_end(); + auto RI = ResultTys.begin(), RE = ResultTys.end(); + for (; SI != SE && RI != RE; ++SI, ++RI) { + auto SrcTy = (*SI)->getType(); + if (SrcTy != *RI) { + // The optimizer likes to eliminate bitcasts leading into variadic + // calls, but that messes with our invariants. Re-insert the + // bitcast and ignore this type mismatch. + if (CastInst::isBitCastable(SrcTy, *RI)) { + auto BCI = new BitCastInst(*SI, *RI, "", Suspend); + SI->set(BCI); + continue; + } + +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("argument to coro.suspend.retcon does not " + "match corresponding prototype function result"); + } + } + if (SI != SE || RI != RE) { +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("wrong number of arguments to coro.suspend.retcon"); + } + + // Check that the result type of the suspend matches the resume types. + Type *SResultTy = Suspend->getType(); + ArrayRef<Type*> SuspendResultTys; + if (SResultTy->isVoidTy()) { + // leave as empty array + } else if (auto SResultStructTy = dyn_cast<StructType>(SResultTy)) { + SuspendResultTys = SResultStructTy->elements(); + } else { + // forms an ArrayRef using SResultTy, be careful + SuspendResultTys = SResultTy; + } + if (SuspendResultTys.size() != ResumeTys.size()) { +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("wrong number of results from coro.suspend.retcon"); + } + for (size_t I = 0, E = ResumeTys.size(); I != E; ++I) { + if (SuspendResultTys[I] != ResumeTys[I]) { +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("result from coro.suspend.retcon does not " + "match corresponding prototype function param"); + } + } + } + break; + } + + default: + llvm_unreachable("coro.begin is not dependent on a coro.id call"); + } + // The coro.free intrinsic is always lowered to the result of coro.begin. for (CoroFrameInst *CF : CoroFrames) { CF->replaceAllUsesWith(CoroBegin); CF->eraseFromParent(); } - // Canonicalize coro.suspend by inserting a coro.save if needed. - for (CoroSuspendInst *CS : CoroSuspends) - if (!CS->getCoroSave()) - createCoroSave(CoroBegin, CS); - // Move final suspend to be the last element in the CoroSuspends vector. - if (HasFinalSuspend && + if (ABI == coro::ABI::Switch && + SwitchLowering.HasFinalSuspend && FinalSuspendIndex != CoroSuspends.size() - 1) std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back()); @@ -345,6 +485,154 @@ void coro::Shape::buildFrom(Function &F) { CoroSave->eraseFromParent(); } +static void propagateCallAttrsFromCallee(CallInst *Call, Function *Callee) { + Call->setCallingConv(Callee->getCallingConv()); + // TODO: attributes? +} + +static void addCallToCallGraph(CallGraph *CG, CallInst *Call, Function *Callee){ + if (CG) + (*CG)[Call->getFunction()]->addCalledFunction(Call, (*CG)[Callee]); +} + +Value *coro::Shape::emitAlloc(IRBuilder<> &Builder, Value *Size, + CallGraph *CG) const { + switch (ABI) { + case coro::ABI::Switch: + llvm_unreachable("can't allocate memory in coro switch-lowering"); + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + auto Alloc = RetconLowering.Alloc; + Size = Builder.CreateIntCast(Size, + Alloc->getFunctionType()->getParamType(0), + /*is signed*/ false); + auto *Call = Builder.CreateCall(Alloc, Size); + propagateCallAttrsFromCallee(Call, Alloc); + addCallToCallGraph(CG, Call, Alloc); + return Call; + } + } + llvm_unreachable("Unknown coro::ABI enum"); +} + +void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr, + CallGraph *CG) const { + switch (ABI) { + case coro::ABI::Switch: + llvm_unreachable("can't allocate memory in coro switch-lowering"); + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + auto Dealloc = RetconLowering.Dealloc; + Ptr = Builder.CreateBitCast(Ptr, + Dealloc->getFunctionType()->getParamType(0)); + auto *Call = Builder.CreateCall(Dealloc, Ptr); + propagateCallAttrsFromCallee(Call, Dealloc); + addCallToCallGraph(CG, Call, Dealloc); + return; + } + } + llvm_unreachable("Unknown coro::ABI enum"); +} + +LLVM_ATTRIBUTE_NORETURN +static void fail(const Instruction *I, const char *Reason, Value *V) { +#ifndef NDEBUG + I->dump(); + if (V) { + errs() << " Value: "; + V->printAsOperand(llvm::errs()); + errs() << '\n'; + } +#endif + report_fatal_error(Reason); +} + +/// Check that the given value is a well-formed prototype for the +/// llvm.coro.id.retcon.* intrinsics. +static void checkWFRetconPrototype(const AnyCoroIdRetconInst *I, Value *V) { + auto F = dyn_cast<Function>(V->stripPointerCasts()); + if (!F) + fail(I, "llvm.coro.id.retcon.* prototype not a Function", V); + + auto FT = F->getFunctionType(); + + if (isa<CoroIdRetconInst>(I)) { + bool ResultOkay; + if (FT->getReturnType()->isPointerTy()) { + ResultOkay = true; + } else if (auto SRetTy = dyn_cast<StructType>(FT->getReturnType())) { + ResultOkay = (!SRetTy->isOpaque() && + SRetTy->getNumElements() > 0 && + SRetTy->getElementType(0)->isPointerTy()); + } else { + ResultOkay = false; + } + if (!ResultOkay) + fail(I, "llvm.coro.id.retcon prototype must return pointer as first " + "result", F); + + if (FT->getReturnType() != + I->getFunction()->getFunctionType()->getReturnType()) + fail(I, "llvm.coro.id.retcon prototype return type must be same as" + "current function return type", F); + } else { + // No meaningful validation to do here for llvm.coro.id.unique.once. + } + + if (FT->getNumParams() == 0 || !FT->getParamType(0)->isPointerTy()) + fail(I, "llvm.coro.id.retcon.* prototype must take pointer as " + "its first parameter", F); +} + +/// Check that the given value is a well-formed allocator. +static void checkWFAlloc(const Instruction *I, Value *V) { + auto F = dyn_cast<Function>(V->stripPointerCasts()); + if (!F) + fail(I, "llvm.coro.* allocator not a Function", V); + + auto FT = F->getFunctionType(); + if (!FT->getReturnType()->isPointerTy()) + fail(I, "llvm.coro.* allocator must return a pointer", F); + + if (FT->getNumParams() != 1 || + !FT->getParamType(0)->isIntegerTy()) + fail(I, "llvm.coro.* allocator must take integer as only param", F); +} + +/// Check that the given value is a well-formed deallocator. +static void checkWFDealloc(const Instruction *I, Value *V) { + auto F = dyn_cast<Function>(V->stripPointerCasts()); + if (!F) + fail(I, "llvm.coro.* deallocator not a Function", V); + + auto FT = F->getFunctionType(); + if (!FT->getReturnType()->isVoidTy()) + fail(I, "llvm.coro.* deallocator must return void", F); + + if (FT->getNumParams() != 1 || + !FT->getParamType(0)->isPointerTy()) + fail(I, "llvm.coro.* deallocator must take pointer as only param", F); +} + +static void checkConstantInt(const Instruction *I, Value *V, + const char *Reason) { + if (!isa<ConstantInt>(V)) { + fail(I, Reason, V); + } +} + +void AnyCoroIdRetconInst::checkWellFormed() const { + checkConstantInt(this, getArgOperand(SizeArg), + "size argument to coro.id.retcon.* must be constant"); + checkConstantInt(this, getArgOperand(AlignArg), + "alignment argument to coro.id.retcon.* must be constant"); + checkWFRetconPrototype(this, getArgOperand(PrototypeArg)); + checkWFAlloc(this, getArgOperand(AllocArg)); + checkWFDealloc(this, getArgOperand(DeallocArg)); +} + void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCoroEarlyPass()); } |