diff options
Diffstat (limited to 'llvm/lib/Transforms/Coroutines/CoroFrame.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 1440 |
1 files changed, 1440 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp new file mode 100644 index 000000000000..2c42cf8a6d25 --- /dev/null +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -0,0 +1,1440 @@ +//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file contains classes used to discover if for a particular value +// there from sue to definition that crosses a suspend block. +// +// Using the information discovered we form a Coroutine Frame structure to +// contain those values. All uses of those values are replaced with appropriate +// GEP + load from the coroutine frame. At the point of the definition we spill +// the value into the coroutine frame. +// +// TODO: pack values tightly using liveness info. +//===----------------------------------------------------------------------===// + +#include "CoroInternal.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/circular_raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" + +using namespace llvm; + +// The "coro-suspend-crossing" flag is very noisy. There is another debug type, +// "coro-frame", which results in leaner debug spew. +#define DEBUG_TYPE "coro-suspend-crossing" + +enum { SmallVectorThreshold = 32 }; + +// Provides two way mapping between the blocks and numbers. +namespace { +class BlockToIndexMapping { + SmallVector<BasicBlock *, SmallVectorThreshold> V; + +public: + size_t size() const { return V.size(); } + + BlockToIndexMapping(Function &F) { + for (BasicBlock &BB : F) + V.push_back(&BB); + llvm::sort(V); + } + + size_t blockToIndex(BasicBlock *BB) const { + auto *I = llvm::lower_bound(V, BB); + assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block"); + return I - V.begin(); + } + + BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; } +}; +} // end anonymous namespace + +// The SuspendCrossingInfo maintains data that allows to answer a question +// whether given two BasicBlocks A and B there is a path from A to B that +// passes through a suspend point. +// +// For every basic block 'i' it maintains a BlockData that consists of: +// Consumes: a bit vector which contains a set of indices of blocks that can +// reach block 'i' +// Kills: a bit vector which contains a set of indices of blocks that can +// reach block 'i', but one of the path will cross a suspend point +// Suspend: a boolean indicating whether block 'i' contains a suspend point. +// End: a boolean indicating whether block 'i' contains a coro.end intrinsic. +// +namespace { +struct SuspendCrossingInfo { + BlockToIndexMapping Mapping; + + struct BlockData { + BitVector Consumes; + BitVector Kills; + bool Suspend = false; + bool End = false; + }; + SmallVector<BlockData, SmallVectorThreshold> Block; + + iterator_range<succ_iterator> successors(BlockData const &BD) const { + BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]); + return llvm::successors(BB); + } + + BlockData &getBlockData(BasicBlock *BB) { + return Block[Mapping.blockToIndex(BB)]; + } + + void dump() const; + void dump(StringRef Label, BitVector const &BV) const; + + SuspendCrossingInfo(Function &F, coro::Shape &Shape); + + bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const { + size_t const DefIndex = Mapping.blockToIndex(DefBB); + size_t const UseIndex = Mapping.blockToIndex(UseBB); + + assert(Block[UseIndex].Consumes[DefIndex] && "use must consume def"); + bool const Result = Block[UseIndex].Kills[DefIndex]; + LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName() + << " answer is " << Result << "\n"); + return Result; + } + + bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const { + auto *I = cast<Instruction>(U); + + // We rewrote PHINodes, so that only the ones with exactly one incoming + // value need to be analyzed. + if (auto *PN = dyn_cast<PHINode>(I)) + if (PN->getNumIncomingValues() > 1) + return false; + + BasicBlock *UseBB = I->getParent(); + + // As a special case, treat uses by an llvm.coro.suspend.retcon + // as if they were uses in the suspend's single predecessor: the + // uses conceptually occur before the suspend. + if (isa<CoroSuspendRetconInst>(I)) { + UseBB = UseBB->getSinglePredecessor(); + assert(UseBB && "should have split coro.suspend into its own block"); + } + + return hasPathCrossingSuspendPoint(DefBB, UseBB); + } + + bool isDefinitionAcrossSuspend(Argument &A, User *U) const { + return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U); + } + + bool isDefinitionAcrossSuspend(Instruction &I, User *U) const { + auto *DefBB = I.getParent(); + + // As a special case, treat values produced by an llvm.coro.suspend.* + // as if they were defined in the single successor: the uses + // conceptually occur after the suspend. + if (isa<AnyCoroSuspendInst>(I)) { + DefBB = DefBB->getSingleSuccessor(); + assert(DefBB && "should have split coro.suspend into its own block"); + } + + return isDefinitionAcrossSuspend(DefBB, U); + } +}; +} // end anonymous namespace + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label, + BitVector const &BV) const { + dbgs() << Label << ":"; + for (size_t I = 0, N = BV.size(); I < N; ++I) + if (BV[I]) + dbgs() << " " << Mapping.indexToBlock(I)->getName(); + dbgs() << "\n"; +} + +LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const { + for (size_t I = 0, N = Block.size(); I < N; ++I) { + BasicBlock *const B = Mapping.indexToBlock(I); + dbgs() << B->getName() << ":\n"; + dump(" Consumes", Block[I].Consumes); + dump(" Kills", Block[I].Kills); + } + dbgs() << "\n"; +} +#endif + +SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) + : Mapping(F) { + const size_t N = Mapping.size(); + Block.resize(N); + + // Initialize every block so that it consumes itself + for (size_t I = 0; I < N; ++I) { + auto &B = Block[I]; + B.Consumes.resize(N); + B.Kills.resize(N); + B.Consumes.set(I); + } + + // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as + // the code beyond coro.end is reachable during initial invocation of the + // coroutine. + for (auto *CE : Shape.CoroEnds) + getBlockData(CE->getParent()).End = true; + + // Mark all suspend blocks and indicate that they kill everything they + // consume. Note, that crossing coro.save also requires a spill, as any code + // between coro.save and coro.suspend may resume the coroutine and all of the + // state needs to be saved by that time. + auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) { + BasicBlock *SuspendBlock = BarrierInst->getParent(); + auto &B = getBlockData(SuspendBlock); + B.Suspend = true; + B.Kills |= B.Consumes; + }; + for (auto *CSI : Shape.CoroSuspends) { + markSuspendBlock(CSI); + if (auto *Save = CSI->getCoroSave()) + markSuspendBlock(Save); + } + + // Iterate propagating consumes and kills until they stop changing. + int Iteration = 0; + (void)Iteration; + + bool Changed; + do { + LLVM_DEBUG(dbgs() << "iteration " << ++Iteration); + LLVM_DEBUG(dbgs() << "==============\n"); + + Changed = false; + for (size_t I = 0; I < N; ++I) { + auto &B = Block[I]; + for (BasicBlock *SI : successors(B)) { + + auto SuccNo = Mapping.blockToIndex(SI); + + // Saved Consumes and Kills bitsets so that it is easy to see + // if anything changed after propagation. + auto &S = Block[SuccNo]; + auto SavedConsumes = S.Consumes; + auto SavedKills = S.Kills; + + // Propagate Kills and Consumes from block B into its successor S. + S.Consumes |= B.Consumes; + S.Kills |= B.Kills; + + // If block B is a suspend block, it should propagate kills into the + // its successor for every block B consumes. + if (B.Suspend) { + S.Kills |= B.Consumes; + } + if (S.Suspend) { + // If block S is a suspend block, it should kill all of the blocks it + // consumes. + S.Kills |= S.Consumes; + } else if (S.End) { + // If block S is an end block, it should not propagate kills as the + // blocks following coro.end() are reached during initial invocation + // of the coroutine while all the data are still available on the + // stack or in the registers. + S.Kills.reset(); + } else { + // This is reached when S block it not Suspend nor coro.end and it + // need to make sure that it is not in the kill set. + S.Kills.reset(SuccNo); + } + + // See if anything changed. + Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes); + + if (S.Kills != SavedKills) { + LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName() + << "\n"); + LLVM_DEBUG(dump("S.Kills", S.Kills)); + LLVM_DEBUG(dump("SavedKills", SavedKills)); + } + if (S.Consumes != SavedConsumes) { + LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n"); + LLVM_DEBUG(dump("S.Consume", S.Consumes)); + LLVM_DEBUG(dump("SavedCons", SavedConsumes)); + } + } + } + } while (Changed); + LLVM_DEBUG(dump()); +} + +#undef DEBUG_TYPE // "coro-suspend-crossing" +#define DEBUG_TYPE "coro-frame" + +// We build up the list of spills for every case where a use is separated +// from the definition by a suspend point. + +static const unsigned InvalidFieldIndex = ~0U; + +namespace { +class Spill { + Value *Def = nullptr; + Instruction *User = nullptr; + unsigned FieldNo = InvalidFieldIndex; + +public: + Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {} + + Value *def() const { return Def; } + Instruction *user() const { return User; } + BasicBlock *userBlock() const { return User->getParent(); } + + // Note that field index is stored in the first SpillEntry for a particular + // definition. Subsequent mentions of a defintion do not have fieldNo + // assigned. This works out fine as the users of Spills capture the info about + // the definition the first time they encounter it. Consider refactoring + // SpillInfo into two arrays to normalize the spill representation. + unsigned fieldIndex() const { + assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field"); + return FieldNo; + } + void setFieldIndex(unsigned FieldNumber) { + assert(FieldNo == InvalidFieldIndex && "Reassigning field number"); + FieldNo = FieldNumber; + } +}; +} // namespace + +// Note that there may be more than one record with the same value of Def in +// the SpillInfo vector. +using SpillInfo = SmallVector<Spill, 8>; + +#ifndef NDEBUG +static void dump(StringRef Title, SpillInfo const &Spills) { + dbgs() << "------------- " << Title << "--------------\n"; + Value *CurrentValue = nullptr; + for (auto const &E : Spills) { + if (CurrentValue != E.def()) { + CurrentValue = E.def(); + CurrentValue->dump(); + } + dbgs() << " user: "; + E.user()->dump(); + } +} +#endif + +namespace { +// We cannot rely solely on natural alignment of a type when building a +// coroutine frame and if the alignment specified on the Alloca instruction +// differs from the natural alignment of the alloca type we will need to insert +// padding. +struct PaddingCalculator { + const DataLayout &DL; + LLVMContext &Context; + unsigned StructSize = 0; + + PaddingCalculator(LLVMContext &Context, DataLayout const &DL) + : DL(DL), Context(Context) {} + + // Replicate the logic from IR/DataLayout.cpp to match field offset + // computation for LLVM structs. + void addType(Type *Ty) { + unsigned TyAlign = DL.getABITypeAlignment(Ty); + if ((StructSize & (TyAlign - 1)) != 0) + StructSize = alignTo(StructSize, TyAlign); + + StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item. + } + + void addTypes(SmallVectorImpl<Type *> const &Types) { + for (auto *Ty : Types) + addType(Ty); + } + + unsigned computePadding(Type *Ty, unsigned ForcedAlignment) { + unsigned TyAlign = DL.getABITypeAlignment(Ty); + auto Natural = alignTo(StructSize, TyAlign); + auto Forced = alignTo(StructSize, ForcedAlignment); + + // Return how many bytes of padding we need to insert. + if (Natural != Forced) + return std::max(Natural, Forced) - StructSize; + + // Rely on natural alignment. + return 0; + } + + // If padding required, return the padding field type to insert. + ArrayType *getPaddingType(Type *Ty, unsigned ForcedAlignment) { + if (auto Padding = computePadding(Ty, ForcedAlignment)) + return ArrayType::get(Type::getInt8Ty(Context), Padding); + + return nullptr; + } +}; +} // namespace + +// Build a struct that will keep state for an active coroutine. +// struct f.frame { +// ResumeFnTy ResumeFnAddr; +// ResumeFnTy DestroyFnAddr; +// int ResumeIndex; +// ... promise (if present) ... +// ... spills ... +// }; +static StructType *buildFrameType(Function &F, coro::Shape &Shape, + SpillInfo &Spills) { + LLVMContext &C = F.getContext(); + const DataLayout &DL = F.getParent()->getDataLayout(); + PaddingCalculator Padder(C, DL); + SmallString<32> Name(F.getName()); + Name.append(".Frame"); + StructType *FrameTy = StructType::create(C, Name); + SmallVector<Type *, 8> Types; + + AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); + + if (Shape.ABI == coro::ABI::Switch) { + auto *FramePtrTy = FrameTy->getPointerTo(); + auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, + /*IsVarArg=*/false); + auto *FnPtrTy = FnTy->getPointerTo(); + + // Figure out how wide should be an integer type storing the suspend index. + unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); + Type *PromiseType = PromiseAlloca + ? PromiseAlloca->getType()->getElementType() + : Type::getInt1Ty(C); + Type *IndexType = Type::getIntNTy(C, IndexBits); + Types.push_back(FnPtrTy); + Types.push_back(FnPtrTy); + Types.push_back(PromiseType); + Types.push_back(IndexType); + } else { + assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); + } + + Value *CurrentDef = nullptr; + + Padder.addTypes(Types); + + // Create an entry for every spilled value. + for (auto &S : Spills) { + if (CurrentDef == S.def()) + continue; + + CurrentDef = S.def(); + // PromiseAlloca was already added to Types array earlier. + if (CurrentDef == PromiseAlloca) + continue; + + uint64_t Count = 1; + Type *Ty = nullptr; + if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) { + Ty = AI->getAllocatedType(); + if (unsigned AllocaAlignment = AI->getAlignment()) { + // If alignment is specified in alloca, see if we need to insert extra + // padding. + if (auto PaddingTy = Padder.getPaddingType(Ty, AllocaAlignment)) { + Types.push_back(PaddingTy); + Padder.addType(PaddingTy); + } + } + if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) + Count = CI->getValue().getZExtValue(); + else + report_fatal_error("Coroutines cannot handle non static allocas yet"); + } else { + Ty = CurrentDef->getType(); + } + S.setFieldIndex(Types.size()); + if (Count == 1) + Types.push_back(Ty); + else + Types.push_back(ArrayType::get(Ty, Count)); + Padder.addType(Ty); + } + FrameTy->setBody(Types); + + switch (Shape.ABI) { + case coro::ABI::Switch: + break; + + // Remember whether the frame is inline in the storage. + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + auto &Layout = F.getParent()->getDataLayout(); + auto Id = Shape.getRetconCoroId(); + Shape.RetconLowering.IsFrameInlineInStorage + = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() && + Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment()); + break; + } + } + + return FrameTy; +} + +// We use a pointer use visitor to discover if there are any writes into an +// alloca that dominates CoroBegin. If that is the case, insertSpills will copy +// the value from the alloca into the coroutine frame spill slot corresponding +// to that alloca. +namespace { +struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> { + using Base = PtrUseVisitor<AllocaUseVisitor>; + AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, + const CoroBeginInst &CB) + : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} + + // We are only interested in uses that dominate coro.begin. + void visit(Instruction &I) { + if (DT.dominates(&I, &CoroBegin)) + Base::visit(I); + } + // We need to provide this overload as PtrUseVisitor uses a pointer based + // visiting function. + void visit(Instruction *I) { return visit(*I); } + + void visitLoadInst(LoadInst &) {} // Good. Nothing to do. + + // If the use is an operand, the pointer escaped and anything can write into + // that memory. If the use is the pointer, we are definitely writing into the + // alloca and therefore we need to copy. + void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); } + + // Any other instruction that is not filtered out by PtrUseVisitor, will + // result in the copy. + void visitInstruction(Instruction &I) { PI.setAborted(&I); } + +private: + const DominatorTree &DT; + const CoroBeginInst &CoroBegin; +}; +} // namespace +static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT, + const CoroBeginInst &CB) { + const DataLayout &DL = A.getModule()->getDataLayout(); + AllocaUseVisitor Visitor(DL, DT, CB); + auto PtrI = Visitor.visitPtr(A); + if (PtrI.isEscaped() || PtrI.isAborted()) { + auto *PointerEscapingInstr = PtrI.getEscapingInst() + ? PtrI.getEscapingInst() + : PtrI.getAbortingInst(); + if (PointerEscapingInstr) { + LLVM_DEBUG( + dbgs() << "AllocaInst copy was triggered by instruction: " + << *PointerEscapingInstr << "\n"); + } + return true; + } + return false; +} + +// We need to make room to insert a spill after initial PHIs, but before +// catchswitch instruction. Placing it before violates the requirement that +// catchswitch, like all other EHPads must be the first nonPHI in a block. +// +// Split away catchswitch into a separate block and insert in its place: +// +// cleanuppad <InsertPt> cleanupret. +// +// cleanupret instruction will act as an insert point for the spill. +static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { + BasicBlock *CurrentBlock = CatchSwitch->getParent(); + BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch); + CurrentBlock->getTerminator()->eraseFromParent(); + + auto *CleanupPad = + CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock); + auto *CleanupRet = + CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock); + return CleanupRet; +} + +// Replace all alloca and SSA values that are accessed across suspend points +// with GetElementPointer from coroutine frame + loads and stores. Create an +// AllocaSpillBB that will become the new entry block for the resume parts of +// the coroutine: +// +// %hdl = coro.begin(...) +// whatever +// +// becomes: +// +// %hdl = coro.begin(...) +// %FramePtr = bitcast i8* hdl to %f.frame* +// br label %AllocaSpillBB +// +// AllocaSpillBB: +// ; geps corresponding to allocas that were moved to coroutine frame +// br label PostSpill +// +// PostSpill: +// whatever +// +// +static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { + auto *CB = Shape.CoroBegin; + LLVMContext &C = CB->getContext(); + IRBuilder<> Builder(CB->getNextNode()); + StructType *FrameTy = Shape.FrameTy; + PointerType *FramePtrTy = FrameTy->getPointerTo(); + auto *FramePtr = + cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); + DominatorTree DT(*CB->getFunction()); + + Value *CurrentValue = nullptr; + BasicBlock *CurrentBlock = nullptr; + Value *CurrentReload = nullptr; + + // Proper field number will be read from field definition. + unsigned Index = InvalidFieldIndex; + + // We need to keep track of any allocas that need "spilling" + // since they will live in the coroutine frame now, all access to them + // need to be changed, not just the access across suspend points + // we remember allocas and their indices to be handled once we processed + // all the spills. + SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas; + // Promise alloca (if present) has a fixed field number. + if (auto *PromiseAlloca = Shape.getPromiseAlloca()) { + assert(Shape.ABI == coro::ABI::Switch); + Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise); + } + + // Create a GEP with the given index into the coroutine frame for the original + // value Orig. Appends an extra 0 index for array-allocas, preserving the + // original type. + auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * { + SmallVector<Value *, 3> Indices = { + ConstantInt::get(Type::getInt32Ty(C), 0), + ConstantInt::get(Type::getInt32Ty(C), Index), + }; + + if (auto *AI = dyn_cast<AllocaInst>(Orig)) { + if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { + auto Count = CI->getValue().getZExtValue(); + if (Count > 1) { + Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0)); + } + } else { + report_fatal_error("Coroutines cannot handle non static allocas yet"); + } + } + + return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices); + }; + + // Create a load instruction to reload the spilled value from the coroutine + // frame. + auto CreateReload = [&](Instruction *InsertBefore) { + assert(Index != InvalidFieldIndex && "accessing unassigned field number"); + Builder.SetInsertPoint(InsertBefore); + + auto *G = GetFramePointer(Index, CurrentValue); + G->setName(CurrentValue->getName() + Twine(".reload.addr")); + + return isa<AllocaInst>(CurrentValue) + ? G + : Builder.CreateLoad(FrameTy->getElementType(Index), G, + CurrentValue->getName() + Twine(".reload")); + }; + + for (auto const &E : Spills) { + // If we have not seen the value, generate a spill. + if (CurrentValue != E.def()) { + CurrentValue = E.def(); + CurrentBlock = nullptr; + CurrentReload = nullptr; + + Index = E.fieldIndex(); + + if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) { + // Spilled AllocaInst will be replaced with GEP from the coroutine frame + // there is no spill required. + Allocas.emplace_back(AI, Index); + if (!AI->isStaticAlloca()) + report_fatal_error("Coroutines cannot handle non static allocas yet"); + } else { + // Otherwise, create a store instruction storing the value into the + // coroutine frame. + + Instruction *InsertPt = nullptr; + if (auto Arg = dyn_cast<Argument>(CurrentValue)) { + // For arguments, we will place the store instruction right after + // the coroutine frame pointer instruction, i.e. bitcast of + // coro.begin from i8* to %f.frame*. + InsertPt = FramePtr->getNextNode(); + + // If we're spilling an Argument, make sure we clear 'nocapture' + // from the coroutine function. + Arg->getParent()->removeParamAttr(Arg->getArgNo(), + Attribute::NoCapture); + + } else if (auto *II = dyn_cast<InvokeInst>(CurrentValue)) { + // If we are spilling the result of the invoke instruction, split the + // normal edge and insert the spill in the new block. + auto NewBB = SplitEdge(II->getParent(), II->getNormalDest()); + InsertPt = NewBB->getTerminator(); + } else if (isa<PHINode>(CurrentValue)) { + // Skip the PHINodes and EH pads instructions. + BasicBlock *DefBlock = cast<Instruction>(E.def())->getParent(); + if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator())) + InsertPt = splitBeforeCatchSwitch(CSI); + else + InsertPt = &*DefBlock->getFirstInsertionPt(); + } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) { + // Don't spill immediately after a suspend; splitting assumes + // that the suspend will be followed by a branch. + InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); + } else { + auto *I = cast<Instruction>(E.def()); + assert(!I->isTerminator() && "unexpected terminator"); + // For all other values, the spill is placed immediately after + // the definition. + if (DT.dominates(CB, I)) { + InsertPt = I->getNextNode(); + } else { + // Unless, it is not dominated by CoroBegin, then it will be + // inserted immediately after CoroFrame is computed. + InsertPt = FramePtr->getNextNode(); + } + } + + Builder.SetInsertPoint(InsertPt); + auto *G = Builder.CreateConstInBoundsGEP2_32( + FrameTy, FramePtr, 0, Index, + CurrentValue->getName() + Twine(".spill.addr")); + Builder.CreateStore(CurrentValue, G); + } + } + + // If we have not seen the use block, generate a reload in it. + if (CurrentBlock != E.userBlock()) { + CurrentBlock = E.userBlock(); + CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt()); + } + + // If we have a single edge PHINode, remove it and replace it with a reload + // from the coroutine frame. (We already took care of multi edge PHINodes + // by rewriting them in the rewritePHIs function). + if (auto *PN = dyn_cast<PHINode>(E.user())) { + assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " + "values in the PHINode"); + PN->replaceAllUsesWith(CurrentReload); + PN->eraseFromParent(); + continue; + } + + // Replace all uses of CurrentValue in the current instruction with reload. + E.user()->replaceUsesOfWith(CurrentValue, CurrentReload); + } + + BasicBlock *FramePtrBB = FramePtr->getParent(); + + auto SpillBlock = + FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); + SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); + Shape.AllocaSpillBlock = SpillBlock; + // If we found any allocas, replace all of their remaining uses with Geps. + // Note: we cannot do it indiscriminately as some of the uses may not be + // dominated by CoroBegin. + bool MightNeedToCopy = false; + Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); + SmallVector<Instruction *, 4> UsersToUpdate; + for (auto &P : Allocas) { + AllocaInst *const A = P.first; + UsersToUpdate.clear(); + for (User *U : A->users()) { + auto *I = cast<Instruction>(U); + if (DT.dominates(CB, I)) + UsersToUpdate.push_back(I); + else + MightNeedToCopy = true; + } + if (!UsersToUpdate.empty()) { + auto *G = GetFramePointer(P.second, A); + G->takeName(A); + for (Instruction *I : UsersToUpdate) + I->replaceUsesOfWith(A, G); + } + } + // If we discovered such uses not dominated by CoroBegin, see if any of them + // preceed coro begin and have instructions that can modify the + // value of the alloca and therefore would require a copying the value into + // the spill slot in the coroutine frame. + if (MightNeedToCopy) { + Builder.SetInsertPoint(FramePtr->getNextNode()); + + for (auto &P : Allocas) { + AllocaInst *const A = P.first; + if (mightWriteIntoAllocaPtr(*A, DT, *CB)) { + if (A->isArrayAllocation()) + report_fatal_error( + "Coroutines cannot handle copying of array allocas yet"); + + auto *G = GetFramePointer(P.second, A); + auto *Value = Builder.CreateLoad(A); + Builder.CreateStore(Value, G); + } + } + } + return FramePtr; +} + +// Sets the unwind edge of an instruction to a particular successor. +static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) { + if (auto *II = dyn_cast<InvokeInst>(TI)) + II->setUnwindDest(Succ); + else if (auto *CS = dyn_cast<CatchSwitchInst>(TI)) + CS->setUnwindDest(Succ); + else if (auto *CR = dyn_cast<CleanupReturnInst>(TI)) + CR->setUnwindDest(Succ); + else + llvm_unreachable("unexpected terminator instruction"); +} + +// Replaces all uses of OldPred with the NewPred block in all PHINodes in a +// block. +static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, + BasicBlock *NewPred, + PHINode *LandingPadReplacement) { + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + + // We manually update the LandingPadReplacement PHINode and it is the last + // PHI Node. So, if we find it, we are done. + if (LandingPadReplacement == PN) + break; + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != OldPred) + BBIdx = PN->getBasicBlockIndex(OldPred); + + assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!"); + PN->setIncomingBlock(BBIdx, NewPred); + } +} + +// Uses SplitEdge unless the successor block is an EHPad, in which case do EH +// specific handling. +static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, + LandingPadInst *OriginalPad, + PHINode *LandingPadReplacement) { + auto *PadInst = Succ->getFirstNonPHI(); + if (!LandingPadReplacement && !PadInst->isEHPad()) + return SplitEdge(BB, Succ); + + auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ); + setUnwindEdgeTo(BB->getTerminator(), NewBB); + updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement); + + if (LandingPadReplacement) { + auto *NewLP = OriginalPad->clone(); + auto *Terminator = BranchInst::Create(Succ, NewBB); + NewLP->insertBefore(Terminator); + LandingPadReplacement->addIncoming(NewLP, NewBB); + return NewBB; + } + Value *ParentPad = nullptr; + if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst)) + ParentPad = FuncletPad->getParentPad(); + else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst)) + ParentPad = CatchSwitch->getParentPad(); + else + llvm_unreachable("handling for other EHPads not implemented yet"); + + auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB); + CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB); + return NewBB; +} + +static void rewritePHIs(BasicBlock &BB) { + // For every incoming edge we will create a block holding all + // incoming values in a single PHI nodes. + // + // loop: + // %n.val = phi i32[%n, %entry], [%inc, %loop] + // + // It will create: + // + // loop.from.entry: + // %n.loop.pre = phi i32 [%n, %entry] + // br %label loop + // loop.from.loop: + // %inc.loop.pre = phi i32 [%inc, %loop] + // br %label loop + // + // After this rewrite, further analysis will ignore any phi nodes with more + // than one incoming edge. + + // TODO: Simplify PHINodes in the basic block to remove duplicate + // predecessors. + + LandingPadInst *LandingPad = nullptr; + PHINode *ReplPHI = nullptr; + if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) { + // ehAwareSplitEdge will clone the LandingPad in all the edge blocks. + // We replace the original landing pad with a PHINode that will collect the + // results from all of them. + ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad); + ReplPHI->takeName(LandingPad); + LandingPad->replaceAllUsesWith(ReplPHI); + // We will erase the original landing pad at the end of this function after + // ehAwareSplitEdge cloned it in the transition blocks. + } + + SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB)); + for (BasicBlock *Pred : Preds) { + auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI); + IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName()); + auto *PN = cast<PHINode>(&BB.front()); + do { + int Index = PN->getBasicBlockIndex(IncomingBB); + Value *V = PN->getIncomingValue(Index); + PHINode *InputV = PHINode::Create( + V->getType(), 1, V->getName() + Twine(".") + BB.getName(), + &IncomingBB->front()); + InputV->addIncoming(V, Pred); + PN->setIncomingValue(Index, InputV); + PN = dyn_cast<PHINode>(PN->getNextNode()); + } while (PN != ReplPHI); // ReplPHI is either null or the PHI that replaced + // the landing pad. + } + + if (LandingPad) { + // Calls to ehAwareSplitEdge function cloned the original lading pad. + // No longer need it. + LandingPad->eraseFromParent(); + } +} + +static void rewritePHIs(Function &F) { + SmallVector<BasicBlock *, 8> WorkList; + + for (BasicBlock &BB : F) + if (auto *PN = dyn_cast<PHINode>(&BB.front())) + if (PN->getNumIncomingValues() > 1) + WorkList.push_back(&BB); + + for (BasicBlock *BB : WorkList) + rewritePHIs(*BB); +} + +// Check for instructions that we can recreate on resume as opposed to spill +// the result into a coroutine frame. +static bool materializable(Instruction &V) { + return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) || + isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V); +} + +// Check for structural coroutine intrinsics that should not be spilled into +// the coroutine frame. +static bool isCoroutineStructureIntrinsic(Instruction &I) { + return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) || + isa<CoroSuspendInst>(&I); +} + +// For every use of the value that is across suspend point, recreate that value +// after a suspend point. +static void rewriteMaterializableInstructions(IRBuilder<> &IRB, + SpillInfo const &Spills) { + BasicBlock *CurrentBlock = nullptr; + Instruction *CurrentMaterialization = nullptr; + Instruction *CurrentDef = nullptr; + + for (auto const &E : Spills) { + // If it is a new definition, update CurrentXXX variables. + if (CurrentDef != E.def()) { + CurrentDef = cast<Instruction>(E.def()); + CurrentBlock = nullptr; + CurrentMaterialization = nullptr; + } + + // If we have not seen this block, materialize the value. + if (CurrentBlock != E.userBlock()) { + CurrentBlock = E.userBlock(); + CurrentMaterialization = cast<Instruction>(CurrentDef)->clone(); + CurrentMaterialization->setName(CurrentDef->getName()); + CurrentMaterialization->insertBefore( + &*CurrentBlock->getFirstInsertionPt()); + } + + if (auto *PN = dyn_cast<PHINode>(E.user())) { + assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " + "values in the PHINode"); + PN->replaceAllUsesWith(CurrentMaterialization); + PN->eraseFromParent(); + continue; + } + + // Replace all uses of CurrentDef in the current instruction with the + // CurrentMaterialization for the block. + E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization); + } +} + +// Splits the block at a particular instruction unless it is the first +// instruction in the block with a single predecessor. +static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) { + auto *BB = I->getParent(); + if (&BB->front() == I) { + if (BB->getSinglePredecessor()) { + BB->setName(Name); + return BB; + } + } + return BB->splitBasicBlock(I, Name); +} + +// Split above and below a particular instruction so that it +// will be all alone by itself in a block. +static void splitAround(Instruction *I, const Twine &Name) { + splitBlockIfNotFirst(I, Name); + splitBlockIfNotFirst(I->getNextNode(), "After" + Name); +} + +static bool isSuspendBlock(BasicBlock *BB) { + return isa<AnyCoroSuspendInst>(BB->front()); +} + +typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet; + +/// Does control flow starting at the given block ever reach a suspend +/// instruction before reaching a block in VisitedOrFreeBBs? +static bool isSuspendReachableFrom(BasicBlock *From, + VisitedBlocksSet &VisitedOrFreeBBs) { + // Eagerly try to add this block to the visited set. If it's already + // there, stop recursing; this path doesn't reach a suspend before + // either looping or reaching a freeing block. + if (!VisitedOrFreeBBs.insert(From).second) + return false; + + // We assume that we'll already have split suspends into their own blocks. + if (isSuspendBlock(From)) + return true; + + // Recurse on the successors. + for (auto Succ : successors(From)) { + if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs)) + return true; + } + + return false; +} + +/// Is the given alloca "local", i.e. bounded in lifetime to not cross a +/// suspend point? +static bool isLocalAlloca(CoroAllocaAllocInst *AI) { + // Seed the visited set with all the basic blocks containing a free + // so that we won't pass them up. + VisitedBlocksSet VisitedOrFreeBBs; + for (auto User : AI->users()) { + if (auto FI = dyn_cast<CoroAllocaFreeInst>(User)) + VisitedOrFreeBBs.insert(FI->getParent()); + } + + return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs); +} + +/// After we split the coroutine, will the given basic block be along +/// an obvious exit path for the resumption function? +static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, + unsigned depth = 3) { + // If we've bottomed out our depth count, stop searching and assume + // that the path might loop back. + if (depth == 0) return false; + + // If this is a suspend block, we're about to exit the resumption function. + if (isSuspendBlock(BB)) return true; + + // Recurse into the successors. + for (auto Succ : successors(BB)) { + if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1)) + return false; + } + + // If none of the successors leads back in a loop, we're on an exit/abort. + return true; +} + +static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) { + // Look for a free that isn't sufficiently obviously followed by + // either a suspend or a termination, i.e. something that will leave + // the coro resumption frame. + for (auto U : AI->users()) { + auto FI = dyn_cast<CoroAllocaFreeInst>(U); + if (!FI) continue; + + if (!willLeaveFunctionImmediatelyAfter(FI->getParent())) + return true; + } + + // If we never found one, we don't need a stack save. + return false; +} + +/// Turn each of the given local allocas into a normal (dynamic) alloca +/// instruction. +static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas, + SmallVectorImpl<Instruction*> &DeadInsts) { + for (auto AI : LocalAllocas) { + auto M = AI->getModule(); + IRBuilder<> Builder(AI); + + // Save the stack depth. Try to avoid doing this if the stackrestore + // is going to immediately precede a return or something. + Value *StackSave = nullptr; + if (localAllocaNeedsStackSave(AI)) + StackSave = Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::stacksave)); + + // Allocate memory. + auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); + Alloca->setAlignment(MaybeAlign(AI->getAlignment())); + + for (auto U : AI->users()) { + // Replace gets with the allocation. + if (isa<CoroAllocaGetInst>(U)) { + U->replaceAllUsesWith(Alloca); + + // Replace frees with stackrestores. This is safe because + // alloca.alloc is required to obey a stack discipline, although we + // don't enforce that structurally. + } else { + auto FI = cast<CoroAllocaFreeInst>(U); + if (StackSave) { + Builder.SetInsertPoint(FI); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::stackrestore), + StackSave); + } + } + DeadInsts.push_back(cast<Instruction>(U)); + } + + DeadInsts.push_back(AI); + } +} + +/// Turn the given coro.alloca.alloc call into a dynamic allocation. +/// This happens during the all-instructions iteration, so it must not +/// delete the call. +static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI, + coro::Shape &Shape, + SmallVectorImpl<Instruction*> &DeadInsts) { + IRBuilder<> Builder(AI); + auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); + + for (User *U : AI->users()) { + if (isa<CoroAllocaGetInst>(U)) { + U->replaceAllUsesWith(Alloc); + } else { + auto FI = cast<CoroAllocaFreeInst>(U); + Builder.SetInsertPoint(FI); + Shape.emitDealloc(Builder, Alloc, nullptr); + } + DeadInsts.push_back(cast<Instruction>(U)); + } + + // Push this on last so that it gets deleted after all the others. + DeadInsts.push_back(AI); + + // Return the new allocation value so that we can check for needed spills. + return cast<Instruction>(Alloc); +} + +/// Get the current swifterror value. +static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, + coro::Shape &Shape) { + // Make a fake function pointer as a sort of intrinsic. + auto FnTy = FunctionType::get(ValueTy, {}, false); + auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); + + auto Call = Builder.CreateCall(Fn, {}); + Shape.SwiftErrorOps.push_back(Call); + + return Call; +} + +/// Set the given value as the current swifterror value. +/// +/// Returns a slot that can be used as a swifterror slot. +static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V, + coro::Shape &Shape) { + // Make a fake function pointer as a sort of intrinsic. + auto FnTy = FunctionType::get(V->getType()->getPointerTo(), + {V->getType()}, false); + auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); + + auto Call = Builder.CreateCall(Fn, { V }); + Shape.SwiftErrorOps.push_back(Call); + + return Call; +} + +/// Set the swifterror value from the given alloca before a call, +/// then put in back in the alloca afterwards. +/// +/// Returns an address that will stand in for the swifterror slot +/// until splitting. +static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call, + AllocaInst *Alloca, + coro::Shape &Shape) { + auto ValueTy = Alloca->getAllocatedType(); + IRBuilder<> Builder(Call); + + // Load the current value from the alloca and set it as the + // swifterror value. + auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca); + auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape); + + // Move to after the call. Since swifterror only has a guaranteed + // value on normal exits, we can ignore implicit and explicit unwind + // edges. + if (isa<CallInst>(Call)) { + Builder.SetInsertPoint(Call->getNextNode()); + } else { + auto Invoke = cast<InvokeInst>(Call); + Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg()); + } + + // Get the current swifterror value and store it to the alloca. + auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape); + Builder.CreateStore(ValueAfterCall, Alloca); + + return Addr; +} + +/// Eliminate a formerly-swifterror alloca by inserting the get/set +/// intrinsics and attempting to MemToReg the alloca away. +static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca, + coro::Shape &Shape) { + for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) { + // We're likely changing the use list, so use a mutation-safe + // iteration pattern. + auto &Use = *UI; + ++UI; + + // swifterror values can only be used in very specific ways. + // We take advantage of that here. + auto User = Use.getUser(); + if (isa<LoadInst>(User) || isa<StoreInst>(User)) + continue; + + assert(isa<CallInst>(User) || isa<InvokeInst>(User)); + auto Call = cast<Instruction>(User); + + auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape); + + // Use the returned slot address as the call argument. + Use.set(Addr); + } + + // All the uses should be loads and stores now. + assert(isAllocaPromotable(Alloca)); +} + +/// "Eliminate" a swifterror argument by reducing it to the alloca case +/// and then loading and storing in the prologue and epilog. +/// +/// The argument keeps the swifterror flag. +static void eliminateSwiftErrorArgument(Function &F, Argument &Arg, + coro::Shape &Shape, + SmallVectorImpl<AllocaInst*> &AllocasToPromote) { + IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); + + auto ArgTy = cast<PointerType>(Arg.getType()); + auto ValueTy = ArgTy->getElementType(); + + // Reduce to the alloca case: + + // Create an alloca and replace all uses of the arg with it. + auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace()); + Arg.replaceAllUsesWith(Alloca); + + // Set an initial value in the alloca. swifterror is always null on entry. + auto InitialValue = Constant::getNullValue(ValueTy); + Builder.CreateStore(InitialValue, Alloca); + + // Find all the suspends in the function and save and restore around them. + for (auto Suspend : Shape.CoroSuspends) { + (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape); + } + + // Find all the coro.ends in the function and restore the error value. + for (auto End : Shape.CoroEnds) { + Builder.SetInsertPoint(End); + auto FinalValue = Builder.CreateLoad(ValueTy, Alloca); + (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape); + } + + // Now we can use the alloca logic. + AllocasToPromote.push_back(Alloca); + eliminateSwiftErrorAlloca(F, Alloca, Shape); +} + +/// Eliminate all problematic uses of swifterror arguments and allocas +/// from the function. We'll fix them up later when splitting the function. +static void eliminateSwiftError(Function &F, coro::Shape &Shape) { + SmallVector<AllocaInst*, 4> AllocasToPromote; + + // Look for a swifterror argument. + for (auto &Arg : F.args()) { + if (!Arg.hasSwiftErrorAttr()) continue; + + eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote); + break; + } + + // Look for swifterror allocas. + for (auto &Inst : F.getEntryBlock()) { + auto Alloca = dyn_cast<AllocaInst>(&Inst); + if (!Alloca || !Alloca->isSwiftError()) continue; + + // Clear the swifterror flag. + Alloca->setSwiftError(false); + + AllocasToPromote.push_back(Alloca); + eliminateSwiftErrorAlloca(F, Alloca, Shape); + } + + // If we have any allocas to promote, compute a dominator tree and + // promote them en masse. + if (!AllocasToPromote.empty()) { + DominatorTree DT(F); + PromoteMemToReg(AllocasToPromote, DT); + } +} + +void coro::buildCoroutineFrame(Function &F, Shape &Shape) { + // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite + // access to local variables. + LowerDbgDeclare(F); + + eliminateSwiftError(F, Shape); + + if (Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.PromiseAlloca) { + Shape.getSwitchCoroId()->clearPromise(); + } + + // Make sure that all coro.save, coro.suspend and the fallthrough coro.end + // intrinsics are in their own blocks to simplify the logic of building up + // SuspendCrossing data. + for (auto *CSI : Shape.CoroSuspends) { + if (auto *Save = CSI->getCoroSave()) + splitAround(Save, "CoroSave"); + splitAround(CSI, "CoroSuspend"); + } + + // Put CoroEnds into their own blocks. + for (CoroEndInst *CE : Shape.CoroEnds) + splitAround(CE, "CoroEnd"); + + // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will + // never has its definition separated from the PHI by the suspend point. + rewritePHIs(F); + + // Build suspend crossing info. + SuspendCrossingInfo Checker(F, Shape); + + IRBuilder<> Builder(F.getContext()); + SpillInfo Spills; + SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas; + SmallVector<Instruction*, 4> DeadInstructions; + + for (int Repeat = 0; Repeat < 4; ++Repeat) { + // See if there are materializable instructions across suspend points. + for (Instruction &I : instructions(F)) + if (materializable(I)) + for (User *U : I.users()) + if (Checker.isDefinitionAcrossSuspend(I, U)) + Spills.emplace_back(&I, U); + + if (Spills.empty()) + break; + + // Rewrite materializable instructions to be materialized at the use point. + LLVM_DEBUG(dump("Materializations", Spills)); + rewriteMaterializableInstructions(Builder, Spills); + Spills.clear(); + } + + // Collect the spills for arguments and other not-materializable values. + for (Argument &A : F.args()) + for (User *U : A.users()) + if (Checker.isDefinitionAcrossSuspend(A, U)) + Spills.emplace_back(&A, U); + + for (Instruction &I : instructions(F)) { + // Values returned from coroutine structure intrinsics should not be part + // of the Coroutine Frame. + if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) + continue; + + // The Coroutine Promise always included into coroutine frame, no need to + // check for suspend crossing. + if (Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.PromiseAlloca == &I) + continue; + + // Handle alloca.alloc specially here. + if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) { + // Check whether the alloca's lifetime is bounded by suspend points. + if (isLocalAlloca(AI)) { + LocalAllocas.push_back(AI); + continue; + } + + // If not, do a quick rewrite of the alloca and then add spills of + // the rewritten value. The rewrite doesn't invalidate anything in + // Spills because the other alloca intrinsics have no other operands + // besides AI, and it doesn't invalidate the iteration because we delay + // erasing AI. + auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); + + for (User *U : Alloc->users()) { + if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) + Spills.emplace_back(Alloc, U); + } + continue; + } + + // Ignore alloca.get; we process this as part of coro.alloca.alloc. + if (isa<CoroAllocaGetInst>(I)) { + continue; + } + + for (User *U : I.users()) + if (Checker.isDefinitionAcrossSuspend(I, U)) { + // We cannot spill a token. + if (I.getType()->isTokenTy()) + report_fatal_error( + "token definition is separated from the use by a suspend point"); + Spills.emplace_back(&I, U); + } + } + LLVM_DEBUG(dump("Spills", Spills)); + Shape.FrameTy = buildFrameType(F, Shape, Spills); + Shape.FramePtr = insertSpills(Spills, Shape); + lowerLocalAllocas(LocalAllocas, DeadInstructions); + + for (auto I : DeadInstructions) + I->eraseFromParent(); +} |
