diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Transforms/Coroutines/CoroFrame.cpp | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'llvm/lib/Transforms/Coroutines/CoroFrame.cpp')
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 515 |
1 files changed, 420 insertions, 95 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 2c42cf8a6d25..f55501a05d85 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -18,18 +18,22 @@ #include "CoroInternal.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Analysis/PtrUseVisitor.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/circular_raw_ostream.h" +#include "llvm/Support/OptimizedStructLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include <algorithm> using namespace llvm; @@ -105,7 +109,6 @@ struct SuspendCrossingInfo { size_t const DefIndex = Mapping.blockToIndex(DefBB); size_t const UseIndex = Mapping.blockToIndex(UseBB); - assert(Block[UseIndex].Consumes[DefIndex] && "use must consume def"); bool const Result = Block[UseIndex].Kills[DefIndex]; LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName() << " answer is " << Result << "\n"); @@ -338,52 +341,182 @@ namespace { // coroutine frame and if the alignment specified on the Alloca instruction // differs from the natural alignment of the alloca type we will need to insert // padding. -struct PaddingCalculator { +class FrameTypeBuilder { + struct Field { + uint64_t Size; + uint64_t Offset; + Spill *ForSpill; + Type *Ty; + unsigned FieldIndex; + Align Alignment; + Align TyAlignment; + }; + const DataLayout &DL; LLVMContext &Context; - unsigned StructSize = 0; + uint64_t StructSize = 0; + Align StructAlign; + bool IsFinished = false; - PaddingCalculator(LLVMContext &Context, DataLayout const &DL) - : DL(DL), Context(Context) {} + SmallVector<Field, 8> Fields; + DenseMap<Value*, unsigned> FieldIndexByKey; - // Replicate the logic from IR/DataLayout.cpp to match field offset - // computation for LLVM structs. - void addType(Type *Ty) { - unsigned TyAlign = DL.getABITypeAlignment(Ty); - if ((StructSize & (TyAlign - 1)) != 0) - StructSize = alignTo(StructSize, TyAlign); +public: + FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) + : DL(DL), Context(Context) {} - StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item. - } + class FieldId { + size_t Value; + explicit FieldId(size_t Value) : Value(Value) {} + + friend class FrameTypeBuilder; + }; + + /// Add a field to this structure for the storage of an `alloca` + /// instruction. + FieldId addFieldForAlloca(AllocaInst *AI, Spill *ForSpill = nullptr, + bool IsHeader = false) { + Type *Ty = AI->getAllocatedType(); + + // Make an array type if this is a static array allocation. + if (AI->isArrayAllocation()) { + if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) + Ty = ArrayType::get(Ty, CI->getValue().getZExtValue()); + else + report_fatal_error("Coroutines cannot handle non static allocas yet"); + } - void addTypes(SmallVectorImpl<Type *> const &Types) { - for (auto *Ty : Types) - addType(Ty); + return addField(Ty, AI->getAlign(), ForSpill, IsHeader); } - unsigned computePadding(Type *Ty, unsigned ForcedAlignment) { - unsigned TyAlign = DL.getABITypeAlignment(Ty); - auto Natural = alignTo(StructSize, TyAlign); - auto Forced = alignTo(StructSize, ForcedAlignment); + /// Add a field to this structure. + FieldId addField(Type *Ty, MaybeAlign FieldAlignment, + Spill *ForSpill = nullptr, + bool IsHeader = false) { + assert(!IsFinished && "adding fields to a finished builder"); + assert(Ty && "must provide a type for a field"); - // Return how many bytes of padding we need to insert. - if (Natural != Forced) - return std::max(Natural, Forced) - StructSize; + // The field size is always the alloc size of the type. + uint64_t FieldSize = DL.getTypeAllocSize(Ty); - // Rely on natural alignment. - return 0; + // The field alignment might not be the type alignment, but we need + // to remember the type alignment anyway to build the type. + Align TyAlignment = DL.getABITypeAlign(Ty); + if (!FieldAlignment) FieldAlignment = TyAlignment; + + // Lay out header fields immediately. + uint64_t Offset; + if (IsHeader) { + Offset = alignTo(StructSize, FieldAlignment); + StructSize = Offset + FieldSize; + + // Everything else has a flexible offset. + } else { + Offset = OptimizedStructLayoutField::FlexibleOffset; + } + + Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0, + *FieldAlignment, TyAlignment}); + return FieldId(Fields.size() - 1); + } + + /// Finish the layout and set the body on the given type. + void finish(StructType *Ty); + + uint64_t getStructSize() const { + assert(IsFinished && "not yet finished!"); + return StructSize; } - // If padding required, return the padding field type to insert. - ArrayType *getPaddingType(Type *Ty, unsigned ForcedAlignment) { - if (auto Padding = computePadding(Ty, ForcedAlignment)) - return ArrayType::get(Type::getInt8Ty(Context), Padding); + Align getStructAlign() const { + assert(IsFinished && "not yet finished!"); + return StructAlign; + } - return nullptr; + unsigned getFieldIndex(FieldId Id) const { + assert(IsFinished && "not yet finished!"); + return Fields[Id.Value].FieldIndex; } }; } // namespace +void FrameTypeBuilder::finish(StructType *Ty) { + assert(!IsFinished && "already finished!"); + + // Prepare the optimal-layout field array. + // The Id in the layout field is a pointer to our Field for it. + SmallVector<OptimizedStructLayoutField, 8> LayoutFields; + LayoutFields.reserve(Fields.size()); + for (auto &Field : Fields) { + LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment, + Field.Offset); + } + + // Perform layout. + auto SizeAndAlign = performOptimizedStructLayout(LayoutFields); + StructSize = SizeAndAlign.first; + StructAlign = SizeAndAlign.second; + + auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & { + return *static_cast<Field *>(const_cast<void*>(LayoutField.Id)); + }; + + // We need to produce a packed struct type if there's a field whose + // assigned offset isn't a multiple of its natural type alignment. + bool Packed = [&] { + for (auto &LayoutField : LayoutFields) { + auto &F = getField(LayoutField); + if (!isAligned(F.TyAlignment, LayoutField.Offset)) + return true; + } + return false; + }(); + + // Build the struct body. + SmallVector<Type*, 16> FieldTypes; + FieldTypes.reserve(LayoutFields.size() * 3 / 2); + uint64_t LastOffset = 0; + for (auto &LayoutField : LayoutFields) { + auto &F = getField(LayoutField); + + auto Offset = LayoutField.Offset; + + // Add a padding field if there's a padding gap and we're either + // building a packed struct or the padding gap is more than we'd + // get from aligning to the field type's natural alignment. + assert(Offset >= LastOffset); + if (Offset != LastOffset) { + if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset) + FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context), + Offset - LastOffset)); + } + + // Record the layout information into both the Field and the + // original Spill, if there is one. + F.Offset = Offset; + F.FieldIndex = FieldTypes.size(); + if (F.ForSpill) { + F.ForSpill->setFieldIndex(F.FieldIndex); + } + + FieldTypes.push_back(F.Ty); + LastOffset = Offset + F.Size; + } + + Ty->setBody(FieldTypes, Packed); + +#ifndef NDEBUG + // Check that the IR layout matches the offsets we expect. + auto Layout = DL.getStructLayout(Ty); + for (auto &F : Fields) { + assert(Ty->getElementType(F.FieldIndex) == F.Ty); + assert(Layout->getElementOffset(F.FieldIndex) == F.Offset); + } +#endif + + IsFinished = true; +} + // Build a struct that will keep state for an active coroutine. // struct f.frame { // ResumeFnTy ResumeFnAddr; @@ -396,13 +529,17 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, SpillInfo &Spills) { LLVMContext &C = F.getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); - PaddingCalculator Padder(C, DL); - SmallString<32> Name(F.getName()); - Name.append(".Frame"); - StructType *FrameTy = StructType::create(C, Name); - SmallVector<Type *, 8> Types; + StructType *FrameTy = [&] { + SmallString<32> Name(F.getName()); + Name.append(".Frame"); + return StructType::create(C, Name); + }(); + + FrameTypeBuilder B(C, DL); AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); + Optional<FrameTypeBuilder::FieldId> PromiseFieldId; + Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId; if (Shape.ABI == coro::ABI::Switch) { auto *FramePtrTy = FrameTy->getPointerTo(); @@ -410,74 +547,74 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, /*IsVarArg=*/false); auto *FnPtrTy = FnTy->getPointerTo(); - // Figure out how wide should be an integer type storing the suspend index. + // Add header fields for the resume and destroy functions. + // We can rely on these being perfectly packed. + B.addField(FnPtrTy, None, nullptr, /*header*/ true); + B.addField(FnPtrTy, None, nullptr, /*header*/ true); + + // Add a header field for the promise if there is one. + if (PromiseAlloca) { + PromiseFieldId = + B.addFieldForAlloca(PromiseAlloca, nullptr, /*header*/ true); + } + + // Add a field to store the suspend index. This doesn't need to + // be in the header. unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); - Type *PromiseType = PromiseAlloca - ? PromiseAlloca->getType()->getElementType() - : Type::getInt1Ty(C); Type *IndexType = Type::getIntNTy(C, IndexBits); - Types.push_back(FnPtrTy); - Types.push_back(FnPtrTy); - Types.push_back(PromiseType); - Types.push_back(IndexType); + + SwitchIndexFieldId = B.addField(IndexType, None); } else { assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); } Value *CurrentDef = nullptr; - Padder.addTypes(Types); - // Create an entry for every spilled value. for (auto &S : Spills) { + // We can have multiple entries in Spills for a single value, but + // they should form a contiguous run. Ignore all but the first. if (CurrentDef == S.def()) continue; CurrentDef = S.def(); - // PromiseAlloca was already added to Types array earlier. - if (CurrentDef == PromiseAlloca) - continue; - uint64_t Count = 1; - Type *Ty = nullptr; + assert(CurrentDef != PromiseAlloca && + "recorded spill use of promise alloca?"); + if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) { - Ty = AI->getAllocatedType(); - if (unsigned AllocaAlignment = AI->getAlignment()) { - // If alignment is specified in alloca, see if we need to insert extra - // padding. - if (auto PaddingTy = Padder.getPaddingType(Ty, AllocaAlignment)) { - Types.push_back(PaddingTy); - Padder.addType(PaddingTy); - } - } - if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) - Count = CI->getValue().getZExtValue(); - else - report_fatal_error("Coroutines cannot handle non static allocas yet"); + B.addFieldForAlloca(AI, &S); } else { - Ty = CurrentDef->getType(); + Type *Ty = CurrentDef->getType(); + B.addField(Ty, None, &S); } - S.setFieldIndex(Types.size()); - if (Count == 1) - Types.push_back(Ty); - else - Types.push_back(ArrayType::get(Ty, Count)); - Padder.addType(Ty); } - FrameTy->setBody(Types); + + B.finish(FrameTy); + Shape.FrameAlign = B.getStructAlign(); + Shape.FrameSize = B.getStructSize(); switch (Shape.ABI) { + // In the switch ABI, remember the field indices for the promise and + // switch-index fields. case coro::ABI::Switch: + Shape.SwitchLowering.IndexField = + B.getFieldIndex(*SwitchIndexFieldId); + Shape.SwitchLowering.PromiseField = + (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0); + + // Also round the frame size up to a multiple of its alignment, as is + // generally expected in C/C++. + Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); break; - // Remember whether the frame is inline in the storage. + // In the retcon ABI, remember whether the frame is inline in the storage. case coro::ABI::Retcon: case coro::ABI::RetconOnce: { - auto &Layout = F.getParent()->getDataLayout(); auto Id = Shape.getRetconCoroId(); Shape.RetconLowering.IsFrameInlineInStorage - = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() && - Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment()); + = (B.getStructSize() <= Id->getStorageSize() && + B.getStructAlign() <= Id->getStorageAlignment()); break; } } @@ -606,10 +743,12 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { // we remember allocas and their indices to be handled once we processed // all the spills. SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas; - // Promise alloca (if present) has a fixed field number. + + // Promise alloca (if present) doesn't show in the spills and has a + // special field number. if (auto *PromiseAlloca = Shape.getPromiseAlloca()) { assert(Shape.ABI == coro::ABI::Switch); - Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise); + Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField()); } // Create a GEP with the given index into the coroutine frame for the original @@ -636,12 +775,12 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { }; // Create a load instruction to reload the spilled value from the coroutine - // frame. - auto CreateReload = [&](Instruction *InsertBefore) { + // frame. Populates the Value pointer reference provided with the frame GEP. + auto CreateReload = [&](Instruction *InsertBefore, Value *&G) { assert(Index != InvalidFieldIndex && "accessing unassigned field number"); Builder.SetInsertPoint(InsertBefore); - auto *G = GetFramePointer(Index, CurrentValue); + G = GetFramePointer(Index, CurrentValue); G->setName(CurrentValue->getName() + Twine(".reload.addr")); return isa<AllocaInst>(CurrentValue) @@ -650,6 +789,7 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { CurrentValue->getName() + Twine(".reload")); }; + Value *GEP = nullptr, *CurrentGEP = nullptr; for (auto const &E : Spills) { // If we have not seen the value, generate a spill. if (CurrentValue != E.def()) { @@ -722,7 +862,7 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { // If we have not seen the use block, generate a reload in it. if (CurrentBlock != E.userBlock()) { CurrentBlock = E.userBlock(); - CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt()); + CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt(), GEP); } // If we have a single edge PHINode, remove it and replace it with a reload @@ -736,6 +876,19 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { continue; } + // If we have not seen this GEP instruction, migrate any dbg.declare from + // the alloca to it. + if (CurrentGEP != GEP) { + CurrentGEP = GEP; + TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(CurrentValue); + if (!DIs.empty()) + DIBuilder(*CurrentBlock->getParent()->getParent(), + /*AllowUnresolved*/ false) + .insertDeclare(CurrentGEP, DIs.front()->getVariable(), + DIs.front()->getExpression(), + DIs.front()->getDebugLoc(), DIs.front()); + } + // Replace all uses of CurrentValue in the current instruction with reload. E.user()->replaceUsesOfWith(CurrentValue, CurrentReload); } @@ -746,14 +899,38 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); Shape.AllocaSpillBlock = SpillBlock; - // If we found any allocas, replace all of their remaining uses with Geps. - // Note: we cannot do it indiscriminately as some of the uses may not be - // dominated by CoroBegin. + + // retcon and retcon.once lowering assumes all uses have been sunk. + if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) { + // If we found any allocas, replace all of their remaining uses with Geps. + Builder.SetInsertPoint(&SpillBlock->front()); + for (auto &P : Allocas) { + auto *G = GetFramePointer(P.second, P.first); + + // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) + // here, as we are changing location of the instruction. + G->takeName(P.first); + P.first->replaceAllUsesWith(G); + P.first->eraseFromParent(); + } + return FramePtr; + } + + // If we found any alloca, replace all of their remaining uses with GEP + // instructions. Because new dbg.declare have been created for these alloca, + // we also delete the original dbg.declare and replace other uses with undef. + // Note: We cannot replace the alloca with GEP instructions indiscriminately, + // as some of the uses may not be dominated by CoroBegin. bool MightNeedToCopy = false; Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); SmallVector<Instruction *, 4> UsersToUpdate; for (auto &P : Allocas) { AllocaInst *const A = P.first; + + for (auto *DI : FindDbgDeclareUses(A)) + DI->eraseFromParent(); + replaceDbgUsesWithUndef(A); + UsersToUpdate.clear(); for (User *U : A->users()) { auto *I = cast<Instruction>(U); @@ -784,7 +961,7 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { "Coroutines cannot handle copying of array allocas yet"); auto *G = GetFramePointer(P.second, A); - auto *Value = Builder.CreateLoad(A); + auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); Builder.CreateStore(Value, G); } } @@ -1106,7 +1283,7 @@ static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas, // Allocate memory. auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); - Alloca->setAlignment(MaybeAlign(AI->getAlignment())); + Alloca->setAlignment(Align(AI->getAlignment())); for (auto U : AI->users()) { // Replace gets with the allocation. @@ -1166,7 +1343,7 @@ static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, auto FnTy = FunctionType::get(ValueTy, {}, false); auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); - auto Call = Builder.CreateCall(Fn, {}); + auto Call = Builder.CreateCall(FnTy, Fn, {}); Shape.SwiftErrorOps.push_back(Call); return Call; @@ -1182,7 +1359,7 @@ static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V, {V->getType()}, false); auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); - auto Call = Builder.CreateCall(Fn, { V }); + auto Call = Builder.CreateCall(FnTy, Fn, { V }); Shape.SwiftErrorOps.push_back(Call); return Call; @@ -1322,11 +1499,125 @@ static void eliminateSwiftError(Function &F, coro::Shape &Shape) { } } -void coro::buildCoroutineFrame(Function &F, Shape &Shape) { - // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite - // access to local variables. - LowerDbgDeclare(F); +/// retcon and retcon.once conventions assume that all spill uses can be sunk +/// after the coro.begin intrinsic. +static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills, + CoroBeginInst *CoroBegin) { + DominatorTree Dom(F); + + SmallSetVector<Instruction *, 32> ToMove; + SmallVector<Instruction *, 32> Worklist; + + // Collect all users that precede coro.begin. + for (auto const &Entry : Spills) { + auto *SpillDef = Entry.def(); + for (User *U : SpillDef->users()) { + auto Inst = cast<Instruction>(U); + if (Inst->getParent() != CoroBegin->getParent() || + Dom.dominates(CoroBegin, Inst)) + continue; + if (ToMove.insert(Inst)) + Worklist.push_back(Inst); + } + } + // Recursively collect users before coro.begin. + while (!Worklist.empty()) { + auto *Def = Worklist.back(); + Worklist.pop_back(); + for (User *U : Def->users()) { + auto Inst = cast<Instruction>(U); + if (Dom.dominates(CoroBegin, Inst)) + continue; + if (ToMove.insert(Inst)) + Worklist.push_back(Inst); + } + } + + // Sort by dominance. + SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end()); + std::sort(InsertionList.begin(), InsertionList.end(), + [&Dom](Instruction *A, Instruction *B) -> bool { + // If a dominates b it should preceed (<) b. + return Dom.dominates(A, B); + }); + Instruction *InsertPt = CoroBegin->getNextNode(); + for (Instruction *Inst : InsertionList) + Inst->moveBefore(InsertPt); + + return; +} + +/// For each local variable that all of its user are only used inside one of +/// suspended region, we sink their lifetime.start markers to the place where +/// after the suspend block. Doing so minimizes the lifetime of each variable, +/// hence minimizing the amount of data we end up putting on the frame. +static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, + SuspendCrossingInfo &Checker) { + DominatorTree DT(F); + + // Collect all possible basic blocks which may dominate all uses of allocas. + SmallPtrSet<BasicBlock *, 4> DomSet; + DomSet.insert(&F.getEntryBlock()); + for (auto *CSI : Shape.CoroSuspends) { + BasicBlock *SuspendBlock = CSI->getParent(); + assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() && + "should have split coro.suspend into its own block"); + DomSet.insert(SuspendBlock->getSingleSuccessor()); + } + + for (Instruction &I : instructions(F)) { + if (!isa<AllocaInst>(&I)) + continue; + + for (BasicBlock *DomBB : DomSet) { + bool Valid = true; + SmallVector<Instruction *, 1> BCInsts; + + auto isUsedByLifetimeStart = [&](Instruction *I) { + if (isa<BitCastInst>(I) && I->hasOneUse()) + if (auto *IT = dyn_cast<IntrinsicInst>(I->user_back())) + return IT->getIntrinsicID() == Intrinsic::lifetime_start; + return false; + }; + + for (User *U : I.users()) { + Instruction *UI = cast<Instruction>(U); + // For all users except lifetime.start markers, if they are all + // dominated by one of the basic blocks and do not cross + // suspend points as well, then there is no need to spill the + // instruction. + if (!DT.dominates(DomBB, UI->getParent()) || + Checker.isDefinitionAcrossSuspend(DomBB, U)) { + // Skip bitcast used by lifetime.start markers. + if (isUsedByLifetimeStart(UI)) { + BCInsts.push_back(UI); + continue; + } + Valid = false; + break; + } + } + // Sink lifetime.start markers to dominate block when they are + // only used outside the region. + if (Valid && BCInsts.size() != 0) { + auto *NewBitcast = BCInsts[0]->clone(); + auto *NewLifetime = cast<Instruction>(BCInsts[0]->user_back())->clone(); + NewLifetime->replaceUsesOfWith(BCInsts[0], NewBitcast); + NewBitcast->insertBefore(DomBB->getTerminator()); + NewLifetime->insertBefore(DomBB->getTerminator()); + + // All the outsided lifetime.start markers are no longer necessary. + for (Instruction *S : BCInsts) { + S->user_back()->eraseFromParent(); + } + break; + } + } + } +} + +void coro::buildCoroutineFrame(Function &F, Shape &Shape) { eliminateSwiftError(F, Shape); if (Shape.ABI == coro::ABI::Switch && @@ -1376,6 +1667,25 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { Spills.clear(); } + sinkLifetimeStartMarkers(F, Shape, Checker); + // Collect lifetime.start info for each alloca. + using LifetimeStart = SmallPtrSet<Instruction *, 2>; + llvm::DenseMap<Instruction *, std::unique_ptr<LifetimeStart>> LifetimeMap; + for (Instruction &I : instructions(F)) { + auto *II = dyn_cast<IntrinsicInst>(&I); + if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) + continue; + + if (auto *OpInst = dyn_cast<BitCastInst>(I.getOperand(1))) + if (auto *AI = dyn_cast<AllocaInst>(OpInst->getOperand(0))) { + + if (LifetimeMap.find(AI) == LifetimeMap.end()) + LifetimeMap[AI] = std::make_unique<LifetimeStart>(); + + LifetimeMap[AI]->insert(OpInst); + } + } + // Collect the spills for arguments and other not-materializable values. for (Argument &A : F.args()) for (User *U : A.users()) @@ -1421,16 +1731,31 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { continue; } - for (User *U : I.users()) - if (Checker.isDefinitionAcrossSuspend(I, U)) { + auto Iter = LifetimeMap.find(&I); + for (User *U : I.users()) { + bool NeedSpill = false; + + // Check against lifetime.start if the instruction has the info. + if (Iter != LifetimeMap.end()) + for (auto *S : *Iter->second) { + if ((NeedSpill = Checker.isDefinitionAcrossSuspend(*S, U))) + break; + } + else + NeedSpill = Checker.isDefinitionAcrossSuspend(I, U); + + if (NeedSpill) { // We cannot spill a token. if (I.getType()->isTokenTy()) report_fatal_error( "token definition is separated from the use by a suspend point"); Spills.emplace_back(&I, U); } + } } LLVM_DEBUG(dump("Spills", Spills)); + if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) + sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); Shape.FrameTy = buildFrameType(F, Shape, Spills); Shape.FramePtr = insertSpills(Spills, Shape); lowerLocalAllocas(LocalAllocas, DeadInstructions); |