diff options
Diffstat (limited to 'llvm/lib/Transforms/Coroutines')
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroCleanup.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroEarly.cpp | 67 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroElide.cpp | 211 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 515 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroInstr.h | 17 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroInternal.h | 42 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 318 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/Coroutines.cpp | 7 |
8 files changed, 928 insertions, 277 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index c2dbd6f41642..233eae37c497 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -5,9 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// This pass lowers all remaining coroutine intrinsics. -//===----------------------------------------------------------------------===// +#include "llvm/Transforms/Coroutines/CoroCleanup.h" #include "CoroInternal.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -90,12 +89,26 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) { // After replacement were made we can cleanup the function body a little. simplifyCFG(F); } + return Changed; } -//===----------------------------------------------------------------------===// -// Top Level Driver -//===----------------------------------------------------------------------===// +static bool declaresCoroCleanupIntrinsics(const Module &M) { + return coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin", + "llvm.coro.subfn.addr", "llvm.coro.free", + "llvm.coro.id", "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once"}); +} + +PreservedAnalyses CoroCleanupPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &M = *F.getParent(); + if (!declaresCoroCleanupIntrinsics(M) || + !Lowerer(M).lowerRemainingCoroIntrinsics(F)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} namespace { @@ -111,10 +124,7 @@ struct CoroCleanupLegacy : FunctionPass { // This pass has work to do only if we find intrinsics we are going to lower // in the module. bool doInitialization(Module &M) override { - if (coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin", - "llvm.coro.subfn.addr", "llvm.coro.free", - "llvm.coro.id", "llvm.coro.id.retcon", - "llvm.coro.id.retcon.once"})) + if (declaresCoroCleanupIntrinsics(M)) L = std::make_unique<Lowerer>(M); return false; } diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index e73fb9eeb1e9..242e6c3f6b23 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -5,13 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// This pass lowers coroutine intrinsics that hide the details of the exact -// calling convention for coroutine resume and destroy functions and details of -// the structure of the coroutine frame. -//===----------------------------------------------------------------------===// +#include "llvm/Transforms/Coroutines/CoroEarly.h" #include "CoroInternal.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" @@ -28,7 +24,7 @@ class Lowerer : public coro::LowererBase { PointerType *const AnyResumeFnPtrTy; Constant *NoopCoro = nullptr; - void lowerResumeOrDestroy(CallSite CS, CoroSubFnInst::ResumeKind); + void lowerResumeOrDestroy(CallBase &CB, CoroSubFnInst::ResumeKind); void lowerCoroPromise(CoroPromiseInst *Intrin); void lowerCoroDone(IntrinsicInst *II); void lowerCoroNoop(IntrinsicInst *II); @@ -47,12 +43,11 @@ public: // an address returned by coro.subfn.addr intrinsic. This is done so that // CGPassManager recognizes devirtualization when CoroElide pass replaces a call // to coro.subfn.addr with an appropriate function address. -void Lowerer::lowerResumeOrDestroy(CallSite CS, +void Lowerer::lowerResumeOrDestroy(CallBase &CB, CoroSubFnInst::ResumeKind Index) { - Value *ResumeAddr = - makeSubFnCall(CS.getArgOperand(0), Index, CS.getInstruction()); - CS.setCalledFunction(ResumeAddr); - CS.setCallingConv(CallingConv::Fast); + Value *ResumeAddr = makeSubFnCall(CB.getArgOperand(0), Index, &CB); + CB.setCalledOperand(ResumeAddr); + CB.setCallingConv(CallingConv::Fast); } // Coroutine promise field is always at the fixed offset from the beginning of @@ -64,14 +59,14 @@ void Lowerer::lowerResumeOrDestroy(CallSite CS, // TODO: Handle the case when coroutine promise alloca has align override. void Lowerer::lowerCoroPromise(CoroPromiseInst *Intrin) { Value *Operand = Intrin->getArgOperand(0); - unsigned Alignement = Intrin->getAlignment(); + Align Alignment = Intrin->getAlignment(); Type *Int8Ty = Builder.getInt8Ty(); auto *SampleStruct = StructType::get(Context, {AnyResumeFnPtrTy, AnyResumeFnPtrTy, Int8Ty}); const DataLayout &DL = TheModule.getDataLayout(); int64_t Offset = alignTo( - DL.getStructLayout(SampleStruct)->getElementOffset(2), Alignement); + DL.getStructLayout(SampleStruct)->getElementOffset(2), Alignment); if (Intrin->isFromPromise()) Offset = -Offset; @@ -98,7 +93,7 @@ void Lowerer::lowerCoroDone(IntrinsicInst *II) { Builder.SetInsertPoint(II); auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy); - auto *Load = Builder.CreateLoad(BCI); + auto *Load = Builder.CreateLoad(FrameTy, BCI); auto *Cond = Builder.CreateICmpEQ(Load, NullPtr); II->replaceAllUsesWith(Cond); @@ -156,8 +151,8 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { SmallVector<CoroFreeInst *, 4> CoroFrees; for (auto IB = inst_begin(F), IE = inst_end(F); IB != IE;) { Instruction &I = *IB++; - if (auto CS = CallSite(&I)) { - switch (CS.getIntrinsicID()) { + if (auto *CB = dyn_cast<CallBase>(&I)) { + switch (CB->getIntrinsicID()) { default: continue; case Intrinsic::coro_free: @@ -167,13 +162,13 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { // Make sure that final suspend point is not duplicated as CoroSplit // pass expects that there is at most one final suspend point. if (cast<CoroSuspendInst>(&I)->isFinal()) - CS.setCannotDuplicate(); + CB->setCannotDuplicate(); break; case Intrinsic::coro_end: // Make sure that fallthrough coro.end is not duplicated as CoroSplit // pass expects that there is at most one fallthrough coro.end. if (cast<CoroEndInst>(&I)->isFallthrough()) - CS.setCannotDuplicate(); + CB->setCannotDuplicate(); break; case Intrinsic::coro_noop: lowerCoroNoop(cast<IntrinsicInst>(&I)); @@ -195,10 +190,10 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); break; case Intrinsic::coro_resume: - lowerResumeOrDestroy(CS, CoroSubFnInst::ResumeIndex); + lowerResumeOrDestroy(*CB, CoroSubFnInst::ResumeIndex); break; case Intrinsic::coro_destroy: - lowerResumeOrDestroy(CS, CoroSubFnInst::DestroyIndex); + lowerResumeOrDestroy(*CB, CoroSubFnInst::DestroyIndex); break; case Intrinsic::coro_promise: lowerCoroPromise(cast<CoroPromiseInst>(&I)); @@ -219,9 +214,23 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { return Changed; } -//===----------------------------------------------------------------------===// -// Top Level Driver -//===----------------------------------------------------------------------===// +static bool declaresCoroEarlyIntrinsics(const Module &M) { + return coro::declaresIntrinsics( + M, {"llvm.coro.id", "llvm.coro.id.retcon", "llvm.coro.id.retcon.once", + "llvm.coro.destroy", "llvm.coro.done", "llvm.coro.end", + "llvm.coro.noop", "llvm.coro.free", "llvm.coro.promise", + "llvm.coro.resume", "llvm.coro.suspend"}); +} + +PreservedAnalyses CoroEarlyPass::run(Function &F, FunctionAnalysisManager &) { + Module &M = *F.getParent(); + if (!declaresCoroEarlyIntrinsics(M) || !Lowerer(M).lowerEarlyIntrinsics(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} namespace { @@ -236,17 +245,7 @@ struct CoroEarlyLegacy : public FunctionPass { // This pass has work to do only if we find intrinsics we are going to lower // in the module. bool doInitialization(Module &M) override { - if (coro::declaresIntrinsics(M, {"llvm.coro.id", - "llvm.coro.id.retcon", - "llvm.coro.id.retcon.once", - "llvm.coro.destroy", - "llvm.coro.done", - "llvm.coro.end", - "llvm.coro.noop", - "llvm.coro.free", - "llvm.coro.promise", - "llvm.coro.resume", - "llvm.coro.suspend"})) + if (declaresCoroEarlyIntrinsics(M)) L = std::make_unique<Lowerer>(M); return false; } diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp index 23d22e23861a..9d364b3097c1 100644 --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -5,12 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// This pass replaces dynamic allocation of coroutine frame with alloca and -// replaces calls to llvm.coro.resume and llvm.coro.destroy with direct calls -// to coroutine sub-functions. -//===----------------------------------------------------------------------===// +#include "llvm/Transforms/Coroutines/CoroElide.h" #include "CoroInternal.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/Dominators.h" @@ -30,14 +28,19 @@ struct Lowerer : coro::LowererBase { SmallVector<CoroBeginInst *, 1> CoroBegins; SmallVector<CoroAllocInst *, 1> CoroAllocs; SmallVector<CoroSubFnInst *, 4> ResumeAddr; - SmallVector<CoroSubFnInst *, 4> DestroyAddr; + DenseMap<CoroBeginInst *, SmallVector<CoroSubFnInst *, 4>> DestroyAddr; SmallVector<CoroFreeInst *, 1> CoroFrees; + SmallPtrSet<const SwitchInst *, 4> CoroSuspendSwitches; Lowerer(Module &M) : LowererBase(M) {} - void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA); + void elideHeapAllocations(Function *F, uint64_t FrameSize, Align FrameAlign, + AAResults &AA); bool shouldElide(Function *F, DominatorTree &DT) const; + void collectPostSplitCoroIds(Function *F); bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT); + bool hasEscapePath(const CoroBeginInst *, + const SmallPtrSetImpl<BasicBlock *> &) const; }; } // end anonymous namespace @@ -90,10 +93,23 @@ static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) { } } -// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type. -static Type *getFrameType(Function *Resume) { - auto *ArgType = Resume->arg_begin()->getType(); - return cast<PointerType>(ArgType)->getElementType(); +// Given a resume function @f.resume(%f.frame* %frame), returns the size +// and expected alignment of %f.frame type. +static std::pair<uint64_t, Align> getFrameLayout(Function *Resume) { + // Prefer to pull information from the function attributes. + auto Size = Resume->getParamDereferenceableBytes(0); + auto Align = Resume->getParamAlign(0); + + // If those aren't given, extract them from the type. + if (Size == 0 || !Align) { + auto *FrameTy = Resume->arg_begin()->getType()->getPointerElementType(); + + const DataLayout &DL = Resume->getParent()->getDataLayout(); + if (!Size) Size = DL.getTypeAllocSize(FrameTy); + if (!Align) Align = DL.getABITypeAlign(FrameTy); + } + + return std::make_pair(Size, *Align); } // Finds first non alloca instruction in the entry block of a function. @@ -106,8 +122,9 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) { // To elide heap allocations we need to suppress code blocks guarded by // llvm.coro.alloc and llvm.coro.free instructions. -void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) { - LLVMContext &C = FrameTy->getContext(); +void Lowerer::elideHeapAllocations(Function *F, uint64_t FrameSize, + Align FrameAlign, AAResults &AA) { + LLVMContext &C = F->getContext(); auto *InsertPt = getFirstNonAllocaInTheEntryBlock(CoroIds.front()->getFunction()); @@ -128,7 +145,9 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) { // here. Possibly we will need to do a mini SROA here and break the coroutine // frame into individual AllocaInst recreating the original alignment. const DataLayout &DL = F->getParent()->getDataLayout(); + auto FrameTy = ArrayType::get(Type::getInt8Ty(C), FrameSize); auto *Frame = new AllocaInst(FrameTy, DL.getAllocaAddrSpace(), "", InsertPt); + Frame->setAlignment(FrameAlign); auto *FrameVoidPtr = new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt); @@ -142,44 +161,92 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) { removeTailCallAttribute(Frame, AA); } +bool Lowerer::hasEscapePath(const CoroBeginInst *CB, + const SmallPtrSetImpl<BasicBlock *> &TIs) const { + const auto &It = DestroyAddr.find(CB); + assert(It != DestroyAddr.end()); + + // Limit the number of blocks we visit. + unsigned Limit = 32 * (1 + It->second.size()); + + SmallVector<const BasicBlock *, 32> Worklist; + Worklist.push_back(CB->getParent()); + + SmallPtrSet<const BasicBlock *, 32> Visited; + // Consider basicblock of coro.destroy as visited one, so that we + // skip the path pass through coro.destroy. + for (auto *DA : It->second) + Visited.insert(DA->getParent()); + + do { + const auto *BB = Worklist.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + if (TIs.count(BB)) + return true; + + // Conservatively say that there is potentially a path. + if (!--Limit) + return true; + + auto TI = BB->getTerminator(); + // Although the default dest of coro.suspend switches is suspend pointer + // which means a escape path to normal terminator, it is reasonable to skip + // it since coroutine frame doesn't change outside the coroutine body. + if (isa<SwitchInst>(TI) && + CoroSuspendSwitches.count(cast<SwitchInst>(TI))) { + Worklist.push_back(cast<SwitchInst>(TI)->getSuccessor(1)); + Worklist.push_back(cast<SwitchInst>(TI)->getSuccessor(2)); + } else + Worklist.append(succ_begin(BB), succ_end(BB)); + + } while (!Worklist.empty()); + + // We have exhausted all possible paths and are certain that coro.begin can + // not reach to any of terminators. + return false; +} + bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const { // If no CoroAllocs, we cannot suppress allocation, so elision is not // possible. if (CoroAllocs.empty()) return false; - // Check that for every coro.begin there is a coro.destroy directly - // referencing the SSA value of that coro.begin along a non-exceptional path. + // Check that for every coro.begin there is at least one coro.destroy directly + // referencing the SSA value of that coro.begin along each + // non-exceptional path. // If the value escaped, then coro.destroy would have been referencing a // memory location storing that value and not the virtual register. + SmallPtrSet<BasicBlock *, 8> Terminators; // First gather all of the non-exceptional terminators for the function. - SmallPtrSet<Instruction *, 8> Terminators; - for (BasicBlock &B : *F) { - auto *TI = B.getTerminator(); - if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() && - !isa<UnreachableInst>(TI)) - Terminators.insert(TI); - } + // Consider the final coro.suspend as the real terminator when the current + // function is a coroutine. + for (BasicBlock &B : *F) { + auto *TI = B.getTerminator(); + if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() && + !isa<UnreachableInst>(TI)) + Terminators.insert(&B); + } // Filter out the coro.destroy that lie along exceptional paths. - SmallPtrSet<CoroSubFnInst *, 4> DAs; - for (CoroSubFnInst *DA : DestroyAddr) { - for (Instruction *TI : Terminators) { - if (DT.dominates(DA, TI)) { - DAs.insert(DA); - break; + SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins; + for (auto &It : DestroyAddr) { + for (Instruction *DA : It.second) { + for (BasicBlock *TI : Terminators) { + if (DT.dominates(DA, TI->getTerminator())) { + ReferencedCoroBegins.insert(It.first); + break; + } } } - } - // Find all the coro.begin referenced by coro.destroy along happy paths. - SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins; - for (CoroSubFnInst *DA : DAs) { - if (auto *CB = dyn_cast<CoroBeginInst>(DA->getFrame())) - ReferencedCoroBegins.insert(CB); - else - return false; + // Whether there is any paths from coro.begin to Terminators which not pass + // through any of the coro.destroys. + if (!ReferencedCoroBegins.count(It.first) && + !hasEscapePath(It.first, Terminators)) + ReferencedCoroBegins.insert(It.first); } // If size of the set is the same as total number of coro.begin, that means we @@ -188,6 +255,30 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const { return ReferencedCoroBegins.size() == CoroBegins.size(); } +void Lowerer::collectPostSplitCoroIds(Function *F) { + CoroIds.clear(); + CoroSuspendSwitches.clear(); + for (auto &I : instructions(F)) { + if (auto *CII = dyn_cast<CoroIdInst>(&I)) + if (CII->getInfo().isPostSplit()) + // If it is the coroutine itself, don't touch it. + if (CII->getCoroutine() != CII->getFunction()) + CoroIds.push_back(CII); + + // Consider case like: + // %0 = call i8 @llvm.coro.suspend(...) + // switch i8 %0, label %suspend [i8 0, label %resume + // i8 1, label %cleanup] + // and collect the SwitchInsts which are used by escape analysis later. + if (auto *CSI = dyn_cast<CoroSuspendInst>(&I)) + if (CSI->hasOneUse() && isa<SwitchInst>(CSI->use_begin()->getUser())) { + SwitchInst *SWI = cast<SwitchInst>(CSI->use_begin()->getUser()); + if (SWI->getNumCases() == 2) + CoroSuspendSwitches.insert(SWI); + } + } +} + bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA, DominatorTree &DT) { CoroBegins.clear(); @@ -218,7 +309,7 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA, ResumeAddr.push_back(II); break; case CoroSubFnInst::DestroyIndex: - DestroyAddr.push_back(II); + DestroyAddr[CB].push_back(II); break; default: llvm_unreachable("unexpected coro.subfn.addr constant"); @@ -241,11 +332,13 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA, Resumers, ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex); - replaceWithConstant(DestroyAddrConstant, DestroyAddr); + for (auto &It : DestroyAddr) + replaceWithConstant(DestroyAddrConstant, It.second); if (ShouldElide) { - auto *FrameTy = getFrameType(cast<Function>(ResumeAddrConstant)); - elideHeapAllocations(CoroId->getFunction(), FrameTy, AA); + auto FrameSizeAndAlign = getFrameLayout(cast<Function>(ResumeAddrConstant)); + elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first, + FrameSizeAndAlign.second, AA); coro::replaceCoroFree(CoroId, /*Elide=*/true); } @@ -272,9 +365,31 @@ static bool replaceDevirtTrigger(Function &F) { return true; } -//===----------------------------------------------------------------------===// -// Top Level Driver -//===----------------------------------------------------------------------===// +static bool declaresCoroElideIntrinsics(Module &M) { + return coro::declaresIntrinsics(M, {"llvm.coro.id"}); +} + +PreservedAnalyses CoroElidePass::run(Function &F, FunctionAnalysisManager &AM) { + auto &M = *F.getParent(); + if (!declaresCoroElideIntrinsics(M)) + return PreservedAnalyses::all(); + + Lowerer L(M); + L.CoroIds.clear(); + L.collectPostSplitCoroIds(&F); + // If we did not find any coro.id, there is nothing to do. + if (L.CoroIds.empty()) + return PreservedAnalyses::all(); + + AAResults &AA = AM.getResult<AAManager>(F); + DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); + + bool Changed = false; + for (auto *CII : L.CoroIds) + Changed |= L.processCoroId(CII, AA, DT); + + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} namespace { struct CoroElideLegacy : FunctionPass { @@ -286,7 +401,7 @@ struct CoroElideLegacy : FunctionPass { std::unique_ptr<Lowerer> L; bool doInitialization(Module &M) override { - if (coro::declaresIntrinsics(M, {"llvm.coro.id"})) + if (declaresCoroElideIntrinsics(M)) L = std::make_unique<Lowerer>(M); return false; } @@ -301,15 +416,7 @@ struct CoroElideLegacy : FunctionPass { Changed = replaceDevirtTrigger(F); L->CoroIds.clear(); - - // Collect all PostSplit coro.ids. - for (auto &I : instructions(F)) - if (auto *CII = dyn_cast<CoroIdInst>(&I)) - if (CII->getInfo().isPostSplit()) - // If it is the coroutine itself, don't touch it. - if (CII->getCoroutine() != CII->getFunction()) - L->CoroIds.push_back(CII); - + L->collectPostSplitCoroIds(&F); // If we did not find any coro.id, there is nothing to do. if (L->CoroIds.empty()) return Changed; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 2c42cf8a6d25..f55501a05d85 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -18,18 +18,22 @@ #include "CoroInternal.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Analysis/PtrUseVisitor.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/circular_raw_ostream.h" +#include "llvm/Support/OptimizedStructLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include <algorithm> using namespace llvm; @@ -105,7 +109,6 @@ struct SuspendCrossingInfo { size_t const DefIndex = Mapping.blockToIndex(DefBB); size_t const UseIndex = Mapping.blockToIndex(UseBB); - assert(Block[UseIndex].Consumes[DefIndex] && "use must consume def"); bool const Result = Block[UseIndex].Kills[DefIndex]; LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName() << " answer is " << Result << "\n"); @@ -338,52 +341,182 @@ namespace { // coroutine frame and if the alignment specified on the Alloca instruction // differs from the natural alignment of the alloca type we will need to insert // padding. -struct PaddingCalculator { +class FrameTypeBuilder { + struct Field { + uint64_t Size; + uint64_t Offset; + Spill *ForSpill; + Type *Ty; + unsigned FieldIndex; + Align Alignment; + Align TyAlignment; + }; + const DataLayout &DL; LLVMContext &Context; - unsigned StructSize = 0; + uint64_t StructSize = 0; + Align StructAlign; + bool IsFinished = false; - PaddingCalculator(LLVMContext &Context, DataLayout const &DL) - : DL(DL), Context(Context) {} + SmallVector<Field, 8> Fields; + DenseMap<Value*, unsigned> FieldIndexByKey; - // Replicate the logic from IR/DataLayout.cpp to match field offset - // computation for LLVM structs. - void addType(Type *Ty) { - unsigned TyAlign = DL.getABITypeAlignment(Ty); - if ((StructSize & (TyAlign - 1)) != 0) - StructSize = alignTo(StructSize, TyAlign); +public: + FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) + : DL(DL), Context(Context) {} - StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item. - } + class FieldId { + size_t Value; + explicit FieldId(size_t Value) : Value(Value) {} + + friend class FrameTypeBuilder; + }; + + /// Add a field to this structure for the storage of an `alloca` + /// instruction. + FieldId addFieldForAlloca(AllocaInst *AI, Spill *ForSpill = nullptr, + bool IsHeader = false) { + Type *Ty = AI->getAllocatedType(); + + // Make an array type if this is a static array allocation. + if (AI->isArrayAllocation()) { + if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) + Ty = ArrayType::get(Ty, CI->getValue().getZExtValue()); + else + report_fatal_error("Coroutines cannot handle non static allocas yet"); + } - void addTypes(SmallVectorImpl<Type *> const &Types) { - for (auto *Ty : Types) - addType(Ty); + return addField(Ty, AI->getAlign(), ForSpill, IsHeader); } - unsigned computePadding(Type *Ty, unsigned ForcedAlignment) { - unsigned TyAlign = DL.getABITypeAlignment(Ty); - auto Natural = alignTo(StructSize, TyAlign); - auto Forced = alignTo(StructSize, ForcedAlignment); + /// Add a field to this structure. + FieldId addField(Type *Ty, MaybeAlign FieldAlignment, + Spill *ForSpill = nullptr, + bool IsHeader = false) { + assert(!IsFinished && "adding fields to a finished builder"); + assert(Ty && "must provide a type for a field"); - // Return how many bytes of padding we need to insert. - if (Natural != Forced) - return std::max(Natural, Forced) - StructSize; + // The field size is always the alloc size of the type. + uint64_t FieldSize = DL.getTypeAllocSize(Ty); - // Rely on natural alignment. - return 0; + // The field alignment might not be the type alignment, but we need + // to remember the type alignment anyway to build the type. + Align TyAlignment = DL.getABITypeAlign(Ty); + if (!FieldAlignment) FieldAlignment = TyAlignment; + + // Lay out header fields immediately. + uint64_t Offset; + if (IsHeader) { + Offset = alignTo(StructSize, FieldAlignment); + StructSize = Offset + FieldSize; + + // Everything else has a flexible offset. + } else { + Offset = OptimizedStructLayoutField::FlexibleOffset; + } + + Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0, + *FieldAlignment, TyAlignment}); + return FieldId(Fields.size() - 1); + } + + /// Finish the layout and set the body on the given type. + void finish(StructType *Ty); + + uint64_t getStructSize() const { + assert(IsFinished && "not yet finished!"); + return StructSize; } - // If padding required, return the padding field type to insert. - ArrayType *getPaddingType(Type *Ty, unsigned ForcedAlignment) { - if (auto Padding = computePadding(Ty, ForcedAlignment)) - return ArrayType::get(Type::getInt8Ty(Context), Padding); + Align getStructAlign() const { + assert(IsFinished && "not yet finished!"); + return StructAlign; + } - return nullptr; + unsigned getFieldIndex(FieldId Id) const { + assert(IsFinished && "not yet finished!"); + return Fields[Id.Value].FieldIndex; } }; } // namespace +void FrameTypeBuilder::finish(StructType *Ty) { + assert(!IsFinished && "already finished!"); + + // Prepare the optimal-layout field array. + // The Id in the layout field is a pointer to our Field for it. + SmallVector<OptimizedStructLayoutField, 8> LayoutFields; + LayoutFields.reserve(Fields.size()); + for (auto &Field : Fields) { + LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment, + Field.Offset); + } + + // Perform layout. + auto SizeAndAlign = performOptimizedStructLayout(LayoutFields); + StructSize = SizeAndAlign.first; + StructAlign = SizeAndAlign.second; + + auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & { + return *static_cast<Field *>(const_cast<void*>(LayoutField.Id)); + }; + + // We need to produce a packed struct type if there's a field whose + // assigned offset isn't a multiple of its natural type alignment. + bool Packed = [&] { + for (auto &LayoutField : LayoutFields) { + auto &F = getField(LayoutField); + if (!isAligned(F.TyAlignment, LayoutField.Offset)) + return true; + } + return false; + }(); + + // Build the struct body. + SmallVector<Type*, 16> FieldTypes; + FieldTypes.reserve(LayoutFields.size() * 3 / 2); + uint64_t LastOffset = 0; + for (auto &LayoutField : LayoutFields) { + auto &F = getField(LayoutField); + + auto Offset = LayoutField.Offset; + + // Add a padding field if there's a padding gap and we're either + // building a packed struct or the padding gap is more than we'd + // get from aligning to the field type's natural alignment. + assert(Offset >= LastOffset); + if (Offset != LastOffset) { + if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset) + FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context), + Offset - LastOffset)); + } + + // Record the layout information into both the Field and the + // original Spill, if there is one. + F.Offset = Offset; + F.FieldIndex = FieldTypes.size(); + if (F.ForSpill) { + F.ForSpill->setFieldIndex(F.FieldIndex); + } + + FieldTypes.push_back(F.Ty); + LastOffset = Offset + F.Size; + } + + Ty->setBody(FieldTypes, Packed); + +#ifndef NDEBUG + // Check that the IR layout matches the offsets we expect. + auto Layout = DL.getStructLayout(Ty); + for (auto &F : Fields) { + assert(Ty->getElementType(F.FieldIndex) == F.Ty); + assert(Layout->getElementOffset(F.FieldIndex) == F.Offset); + } +#endif + + IsFinished = true; +} + // Build a struct that will keep state for an active coroutine. // struct f.frame { // ResumeFnTy ResumeFnAddr; @@ -396,13 +529,17 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, SpillInfo &Spills) { LLVMContext &C = F.getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); - PaddingCalculator Padder(C, DL); - SmallString<32> Name(F.getName()); - Name.append(".Frame"); - StructType *FrameTy = StructType::create(C, Name); - SmallVector<Type *, 8> Types; + StructType *FrameTy = [&] { + SmallString<32> Name(F.getName()); + Name.append(".Frame"); + return StructType::create(C, Name); + }(); + + FrameTypeBuilder B(C, DL); AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); + Optional<FrameTypeBuilder::FieldId> PromiseFieldId; + Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId; if (Shape.ABI == coro::ABI::Switch) { auto *FramePtrTy = FrameTy->getPointerTo(); @@ -410,74 +547,74 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, /*IsVarArg=*/false); auto *FnPtrTy = FnTy->getPointerTo(); - // Figure out how wide should be an integer type storing the suspend index. + // Add header fields for the resume and destroy functions. + // We can rely on these being perfectly packed. + B.addField(FnPtrTy, None, nullptr, /*header*/ true); + B.addField(FnPtrTy, None, nullptr, /*header*/ true); + + // Add a header field for the promise if there is one. + if (PromiseAlloca) { + PromiseFieldId = + B.addFieldForAlloca(PromiseAlloca, nullptr, /*header*/ true); + } + + // Add a field to store the suspend index. This doesn't need to + // be in the header. unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); - Type *PromiseType = PromiseAlloca - ? PromiseAlloca->getType()->getElementType() - : Type::getInt1Ty(C); Type *IndexType = Type::getIntNTy(C, IndexBits); - Types.push_back(FnPtrTy); - Types.push_back(FnPtrTy); - Types.push_back(PromiseType); - Types.push_back(IndexType); + + SwitchIndexFieldId = B.addField(IndexType, None); } else { assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); } Value *CurrentDef = nullptr; - Padder.addTypes(Types); - // Create an entry for every spilled value. for (auto &S : Spills) { + // We can have multiple entries in Spills for a single value, but + // they should form a contiguous run. Ignore all but the first. if (CurrentDef == S.def()) continue; CurrentDef = S.def(); - // PromiseAlloca was already added to Types array earlier. - if (CurrentDef == PromiseAlloca) - continue; - uint64_t Count = 1; - Type *Ty = nullptr; + assert(CurrentDef != PromiseAlloca && + "recorded spill use of promise alloca?"); + if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) { - Ty = AI->getAllocatedType(); - if (unsigned AllocaAlignment = AI->getAlignment()) { - // If alignment is specified in alloca, see if we need to insert extra - // padding. - if (auto PaddingTy = Padder.getPaddingType(Ty, AllocaAlignment)) { - Types.push_back(PaddingTy); - Padder.addType(PaddingTy); - } - } - if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) - Count = CI->getValue().getZExtValue(); - else - report_fatal_error("Coroutines cannot handle non static allocas yet"); + B.addFieldForAlloca(AI, &S); } else { - Ty = CurrentDef->getType(); + Type *Ty = CurrentDef->getType(); + B.addField(Ty, None, &S); } - S.setFieldIndex(Types.size()); - if (Count == 1) - Types.push_back(Ty); - else - Types.push_back(ArrayType::get(Ty, Count)); - Padder.addType(Ty); } - FrameTy->setBody(Types); + + B.finish(FrameTy); + Shape.FrameAlign = B.getStructAlign(); + Shape.FrameSize = B.getStructSize(); switch (Shape.ABI) { + // In the switch ABI, remember the field indices for the promise and + // switch-index fields. case coro::ABI::Switch: + Shape.SwitchLowering.IndexField = + B.getFieldIndex(*SwitchIndexFieldId); + Shape.SwitchLowering.PromiseField = + (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0); + + // Also round the frame size up to a multiple of its alignment, as is + // generally expected in C/C++. + Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); break; - // Remember whether the frame is inline in the storage. + // In the retcon ABI, remember whether the frame is inline in the storage. case coro::ABI::Retcon: case coro::ABI::RetconOnce: { - auto &Layout = F.getParent()->getDataLayout(); auto Id = Shape.getRetconCoroId(); Shape.RetconLowering.IsFrameInlineInStorage - = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() && - Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment()); + = (B.getStructSize() <= Id->getStorageSize() && + B.getStructAlign() <= Id->getStorageAlignment()); break; } } @@ -606,10 +743,12 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { // we remember allocas and their indices to be handled once we processed // all the spills. SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas; - // Promise alloca (if present) has a fixed field number. + + // Promise alloca (if present) doesn't show in the spills and has a + // special field number. if (auto *PromiseAlloca = Shape.getPromiseAlloca()) { assert(Shape.ABI == coro::ABI::Switch); - Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise); + Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField()); } // Create a GEP with the given index into the coroutine frame for the original @@ -636,12 +775,12 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { }; // Create a load instruction to reload the spilled value from the coroutine - // frame. - auto CreateReload = [&](Instruction *InsertBefore) { + // frame. Populates the Value pointer reference provided with the frame GEP. + auto CreateReload = [&](Instruction *InsertBefore, Value *&G) { assert(Index != InvalidFieldIndex && "accessing unassigned field number"); Builder.SetInsertPoint(InsertBefore); - auto *G = GetFramePointer(Index, CurrentValue); + G = GetFramePointer(Index, CurrentValue); G->setName(CurrentValue->getName() + Twine(".reload.addr")); return isa<AllocaInst>(CurrentValue) @@ -650,6 +789,7 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { CurrentValue->getName() + Twine(".reload")); }; + Value *GEP = nullptr, *CurrentGEP = nullptr; for (auto const &E : Spills) { // If we have not seen the value, generate a spill. if (CurrentValue != E.def()) { @@ -722,7 +862,7 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { // If we have not seen the use block, generate a reload in it. if (CurrentBlock != E.userBlock()) { CurrentBlock = E.userBlock(); - CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt()); + CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt(), GEP); } // If we have a single edge PHINode, remove it and replace it with a reload @@ -736,6 +876,19 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { continue; } + // If we have not seen this GEP instruction, migrate any dbg.declare from + // the alloca to it. + if (CurrentGEP != GEP) { + CurrentGEP = GEP; + TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(CurrentValue); + if (!DIs.empty()) + DIBuilder(*CurrentBlock->getParent()->getParent(), + /*AllowUnresolved*/ false) + .insertDeclare(CurrentGEP, DIs.front()->getVariable(), + DIs.front()->getExpression(), + DIs.front()->getDebugLoc(), DIs.front()); + } + // Replace all uses of CurrentValue in the current instruction with reload. E.user()->replaceUsesOfWith(CurrentValue, CurrentReload); } @@ -746,14 +899,38 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); Shape.AllocaSpillBlock = SpillBlock; - // If we found any allocas, replace all of their remaining uses with Geps. - // Note: we cannot do it indiscriminately as some of the uses may not be - // dominated by CoroBegin. + + // retcon and retcon.once lowering assumes all uses have been sunk. + if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) { + // If we found any allocas, replace all of their remaining uses with Geps. + Builder.SetInsertPoint(&SpillBlock->front()); + for (auto &P : Allocas) { + auto *G = GetFramePointer(P.second, P.first); + + // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) + // here, as we are changing location of the instruction. + G->takeName(P.first); + P.first->replaceAllUsesWith(G); + P.first->eraseFromParent(); + } + return FramePtr; + } + + // If we found any alloca, replace all of their remaining uses with GEP + // instructions. Because new dbg.declare have been created for these alloca, + // we also delete the original dbg.declare and replace other uses with undef. + // Note: We cannot replace the alloca with GEP instructions indiscriminately, + // as some of the uses may not be dominated by CoroBegin. bool MightNeedToCopy = false; Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); SmallVector<Instruction *, 4> UsersToUpdate; for (auto &P : Allocas) { AllocaInst *const A = P.first; + + for (auto *DI : FindDbgDeclareUses(A)) + DI->eraseFromParent(); + replaceDbgUsesWithUndef(A); + UsersToUpdate.clear(); for (User *U : A->users()) { auto *I = cast<Instruction>(U); @@ -784,7 +961,7 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { "Coroutines cannot handle copying of array allocas yet"); auto *G = GetFramePointer(P.second, A); - auto *Value = Builder.CreateLoad(A); + auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); Builder.CreateStore(Value, G); } } @@ -1106,7 +1283,7 @@ static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas, // Allocate memory. auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); - Alloca->setAlignment(MaybeAlign(AI->getAlignment())); + Alloca->setAlignment(Align(AI->getAlignment())); for (auto U : AI->users()) { // Replace gets with the allocation. @@ -1166,7 +1343,7 @@ static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, auto FnTy = FunctionType::get(ValueTy, {}, false); auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); - auto Call = Builder.CreateCall(Fn, {}); + auto Call = Builder.CreateCall(FnTy, Fn, {}); Shape.SwiftErrorOps.push_back(Call); return Call; @@ -1182,7 +1359,7 @@ static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V, {V->getType()}, false); auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); - auto Call = Builder.CreateCall(Fn, { V }); + auto Call = Builder.CreateCall(FnTy, Fn, { V }); Shape.SwiftErrorOps.push_back(Call); return Call; @@ -1322,11 +1499,125 @@ static void eliminateSwiftError(Function &F, coro::Shape &Shape) { } } -void coro::buildCoroutineFrame(Function &F, Shape &Shape) { - // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite - // access to local variables. - LowerDbgDeclare(F); +/// retcon and retcon.once conventions assume that all spill uses can be sunk +/// after the coro.begin intrinsic. +static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills, + CoroBeginInst *CoroBegin) { + DominatorTree Dom(F); + + SmallSetVector<Instruction *, 32> ToMove; + SmallVector<Instruction *, 32> Worklist; + + // Collect all users that precede coro.begin. + for (auto const &Entry : Spills) { + auto *SpillDef = Entry.def(); + for (User *U : SpillDef->users()) { + auto Inst = cast<Instruction>(U); + if (Inst->getParent() != CoroBegin->getParent() || + Dom.dominates(CoroBegin, Inst)) + continue; + if (ToMove.insert(Inst)) + Worklist.push_back(Inst); + } + } + // Recursively collect users before coro.begin. + while (!Worklist.empty()) { + auto *Def = Worklist.back(); + Worklist.pop_back(); + for (User *U : Def->users()) { + auto Inst = cast<Instruction>(U); + if (Dom.dominates(CoroBegin, Inst)) + continue; + if (ToMove.insert(Inst)) + Worklist.push_back(Inst); + } + } + + // Sort by dominance. + SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end()); + std::sort(InsertionList.begin(), InsertionList.end(), + [&Dom](Instruction *A, Instruction *B) -> bool { + // If a dominates b it should preceed (<) b. + return Dom.dominates(A, B); + }); + Instruction *InsertPt = CoroBegin->getNextNode(); + for (Instruction *Inst : InsertionList) + Inst->moveBefore(InsertPt); + + return; +} + +/// For each local variable that all of its user are only used inside one of +/// suspended region, we sink their lifetime.start markers to the place where +/// after the suspend block. Doing so minimizes the lifetime of each variable, +/// hence minimizing the amount of data we end up putting on the frame. +static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, + SuspendCrossingInfo &Checker) { + DominatorTree DT(F); + + // Collect all possible basic blocks which may dominate all uses of allocas. + SmallPtrSet<BasicBlock *, 4> DomSet; + DomSet.insert(&F.getEntryBlock()); + for (auto *CSI : Shape.CoroSuspends) { + BasicBlock *SuspendBlock = CSI->getParent(); + assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() && + "should have split coro.suspend into its own block"); + DomSet.insert(SuspendBlock->getSingleSuccessor()); + } + + for (Instruction &I : instructions(F)) { + if (!isa<AllocaInst>(&I)) + continue; + + for (BasicBlock *DomBB : DomSet) { + bool Valid = true; + SmallVector<Instruction *, 1> BCInsts; + + auto isUsedByLifetimeStart = [&](Instruction *I) { + if (isa<BitCastInst>(I) && I->hasOneUse()) + if (auto *IT = dyn_cast<IntrinsicInst>(I->user_back())) + return IT->getIntrinsicID() == Intrinsic::lifetime_start; + return false; + }; + + for (User *U : I.users()) { + Instruction *UI = cast<Instruction>(U); + // For all users except lifetime.start markers, if they are all + // dominated by one of the basic blocks and do not cross + // suspend points as well, then there is no need to spill the + // instruction. + if (!DT.dominates(DomBB, UI->getParent()) || + Checker.isDefinitionAcrossSuspend(DomBB, U)) { + // Skip bitcast used by lifetime.start markers. + if (isUsedByLifetimeStart(UI)) { + BCInsts.push_back(UI); + continue; + } + Valid = false; + break; + } + } + // Sink lifetime.start markers to dominate block when they are + // only used outside the region. + if (Valid && BCInsts.size() != 0) { + auto *NewBitcast = BCInsts[0]->clone(); + auto *NewLifetime = cast<Instruction>(BCInsts[0]->user_back())->clone(); + NewLifetime->replaceUsesOfWith(BCInsts[0], NewBitcast); + NewBitcast->insertBefore(DomBB->getTerminator()); + NewLifetime->insertBefore(DomBB->getTerminator()); + + // All the outsided lifetime.start markers are no longer necessary. + for (Instruction *S : BCInsts) { + S->user_back()->eraseFromParent(); + } + break; + } + } + } +} + +void coro::buildCoroutineFrame(Function &F, Shape &Shape) { eliminateSwiftError(F, Shape); if (Shape.ABI == coro::ABI::Switch && @@ -1376,6 +1667,25 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { Spills.clear(); } + sinkLifetimeStartMarkers(F, Shape, Checker); + // Collect lifetime.start info for each alloca. + using LifetimeStart = SmallPtrSet<Instruction *, 2>; + llvm::DenseMap<Instruction *, std::unique_ptr<LifetimeStart>> LifetimeMap; + for (Instruction &I : instructions(F)) { + auto *II = dyn_cast<IntrinsicInst>(&I); + if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) + continue; + + if (auto *OpInst = dyn_cast<BitCastInst>(I.getOperand(1))) + if (auto *AI = dyn_cast<AllocaInst>(OpInst->getOperand(0))) { + + if (LifetimeMap.find(AI) == LifetimeMap.end()) + LifetimeMap[AI] = std::make_unique<LifetimeStart>(); + + LifetimeMap[AI]->insert(OpInst); + } + } + // Collect the spills for arguments and other not-materializable values. for (Argument &A : F.args()) for (User *U : A.users()) @@ -1421,16 +1731,31 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { continue; } - for (User *U : I.users()) - if (Checker.isDefinitionAcrossSuspend(I, U)) { + auto Iter = LifetimeMap.find(&I); + for (User *U : I.users()) { + bool NeedSpill = false; + + // Check against lifetime.start if the instruction has the info. + if (Iter != LifetimeMap.end()) + for (auto *S : *Iter->second) { + if ((NeedSpill = Checker.isDefinitionAcrossSuspend(*S, U))) + break; + } + else + NeedSpill = Checker.isDefinitionAcrossSuspend(I, U); + + if (NeedSpill) { // We cannot spill a token. if (I.getType()->isTokenTy()) report_fatal_error( "token definition is separated from the use by a suspend point"); Spills.emplace_back(&I, U); } + } } LLVM_DEBUG(dump("Spills", Spills)); + if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) + sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); Shape.FrameTy = buildFrameType(F, Shape, Spills); Shape.FramePtr = insertSpills(Spills, Shape); lowerLocalAllocas(LocalAllocas, DeadInstructions); diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h index de2d2920cb15..320137526db8 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -211,8 +211,8 @@ public: return cast<ConstantInt>(getArgOperand(SizeArg))->getZExtValue(); } - uint64_t getStorageAlignment() const { - return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue(); + Align getStorageAlignment() const { + return cast<ConstantInt>(getArgOperand(AlignArg))->getAlignValue(); } Value *getStorage() const { @@ -338,11 +338,16 @@ class LLVM_LIBRARY_VISIBILITY CoroPromiseInst : public IntrinsicInst { enum { FrameArg, AlignArg, FromArg }; public: + /// Are we translating from the frame to the promise (false) or from + /// the promise to the frame (true)? bool isFromPromise() const { return cast<Constant>(getArgOperand(FromArg))->isOneValue(); } - unsigned getAlignment() const { - return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue(); + + /// The required alignment of the promise. This must match the + /// alignment of the promise alloca in the coroutine. + Align getAlignment() const { + return cast<ConstantInt>(getArgOperand(AlignArg))->getAlignValue(); } // Methods to support type inquiry through isa, cast, and dyn_cast: @@ -463,8 +468,8 @@ public: Value *getSize() const { return getArgOperand(SizeArg); } - unsigned getAlignment() const { - return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue(); + Align getAlignment() const { + return cast<ConstantInt>(getArgOperand(AlignArg))->getAlignValue(); } // Methods to support type inquiry through isa, cast, and dyn_cast: diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 7eb35400c0d5..bd76e93c9124 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -96,17 +96,22 @@ struct LLVM_LIBRARY_VISIBILITY Shape { struct SwitchFieldIndex { enum { Resume, - Destroy, - Promise, - Index, - /// The index of the first spill field. - FirstSpill + Destroy + + // The promise field is always at a fixed offset from the start of + // frame given its type, but the index isn't a constant for all + // possible frames. + + // The switch-index field isn't at a fixed offset or index, either; + // we just work it in where it fits best. }; }; coro::ABI ABI; StructType *FrameTy; + Align FrameAlign; + uint64_t FrameSize; Instruction *FramePtr; BasicBlock *AllocaSpillBlock; @@ -114,6 +119,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape { SwitchInst *ResumeSwitch; AllocaInst *PromiseAlloca; BasicBlock *ResumeEntryBlock; + unsigned IndexField; + unsigned PromiseField; bool HasFinalSuspend; }; @@ -141,10 +148,15 @@ struct LLVM_LIBRARY_VISIBILITY Shape { return cast<AnyCoroIdRetconInst>(CoroBegin->getId()); } + unsigned getSwitchIndexField() const { + assert(ABI == coro::ABI::Switch); + assert(FrameTy && "frame type not assigned"); + return SwitchLowering.IndexField; + } IntegerType *getIndexType() const { assert(ABI == coro::ABI::Switch); assert(FrameTy && "frame type not assigned"); - return cast<IntegerType>(FrameTy->getElementType(SwitchFieldIndex::Index)); + return cast<IntegerType>(FrameTy->getElementType(getSwitchIndexField())); } ConstantInt *getIndex(uint64_t Value) const { return ConstantInt::get(getIndexType(), Value); @@ -203,23 +215,17 @@ struct LLVM_LIBRARY_VISIBILITY Shape { llvm_unreachable("Unknown coro::ABI enum"); } - unsigned getFirstSpillFieldIndex() const { - switch (ABI) { - case coro::ABI::Switch: - return SwitchFieldIndex::FirstSpill; - - case coro::ABI::Retcon: - case coro::ABI::RetconOnce: - return 0; - } - llvm_unreachable("Unknown coro::ABI enum"); - } - AllocaInst *getPromiseAlloca() const { if (ABI == coro::ABI::Switch) return SwitchLowering.PromiseAlloca; return nullptr; } + unsigned getPromiseField() const { + assert(ABI == coro::ABI::Switch); + assert(FrameTy && "frame type not assigned"); + assert(SwitchLowering.PromiseAlloca && "no promise alloca"); + return SwitchLowering.PromiseField; + } /// Allocate memory according to the rules of the active lowering. /// diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 66cb3e74e53e..9c4392e7999b 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -18,6 +18,7 @@ // coroutine. //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Coroutines/CoroSplit.h" #include "CoroInstr.h" #include "CoroInternal.h" #include "llvm/ADT/DenseMap.h" @@ -31,7 +32,6 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -59,6 +59,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -74,7 +75,7 @@ using namespace llvm; namespace { -/// A little helper class for building +/// A little helper class for building class CoroCloner { public: enum class Kind { @@ -283,7 +284,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { auto *FramePtr = Shape.FramePtr; auto *FrameTy = Shape.FrameTy; auto *GepIndex = Builder.CreateStructGEP( - FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); + FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); auto *Switch = Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); @@ -309,7 +310,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { Builder.CreateStore(NullPtr, GepIndex); } else { auto *GepIndex = Builder.CreateStructGEP( - FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); + FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); Builder.CreateStore(IndexVal, GepIndex); } Save->replaceAllUsesWith(ConstantTokenNone::get(C)); @@ -562,11 +563,12 @@ void CoroCloner::replaceEntryBlock() { // In the original function, the AllocaSpillBlock is a block immediately // following the allocation of the frame object which defines GEPs for // all the allocas that have been moved into the frame, and it ends by - // branching to the original beginning of the coroutine. Make this + // branching to the original beginning of the coroutine. Make this // the entry block of the cloned function. auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]); + auto *OldEntry = &NewF->getEntryBlock(); Entry->setName("entry" + Suffix); - Entry->moveBefore(&NewF->getEntryBlock()); + Entry->moveBefore(OldEntry); Entry->getTerminator()->eraseFromParent(); // Clear all predecessors of the new entry block. There should be @@ -579,8 +581,14 @@ void CoroCloner::replaceEntryBlock() { Builder.CreateUnreachable(); BranchToEntry->eraseFromParent(); - // TODO: move any allocas into Entry that weren't moved into the frame. - // (Currently we move all allocas into the frame.) + // Move any allocas into Entry that weren't moved into the frame. + for (auto IT = OldEntry->begin(), End = OldEntry->end(); IT != End;) { + Instruction &I = *IT++; + if (!isa<AllocaInst>(&I) || I.use_empty()) + continue; + + I.moveBefore(*Entry, Entry->getFirstInsertionPt()); + } // Branch from the entry to the appropriate place. Builder.SetInsertPoint(Entry); @@ -630,12 +638,23 @@ Value *CoroCloner::deriveNewFramePointer() { // Otherwise, load the real frame from the opaque storage. auto FramePtrPtr = Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo()); - return Builder.CreateLoad(FramePtrPtr); + return Builder.CreateLoad(FramePtrTy, FramePtrPtr); } } llvm_unreachable("bad ABI"); } +static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context, + unsigned ParamIndex, + uint64_t Size, Align Alignment) { + AttrBuilder ParamAttrs; + ParamAttrs.addAttribute(Attribute::NonNull); + ParamAttrs.addAttribute(Attribute::NoAlias); + ParamAttrs.addAlignmentAttr(Alignment); + ParamAttrs.addDereferenceableAttr(Size); + Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); +} + /// Clone the body of the original function into a resume function of /// some sort. void CoroCloner::create() { @@ -684,6 +703,9 @@ void CoroCloner::create() { // original function. This should include optimization settings and so on. NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, OrigAttrs.getFnAttributes()); + + addFramePointerAttrs(NewAttrs, Context, 0, + Shape.FrameSize, Shape.FrameAlign); break; case coro::ABI::Retcon: @@ -691,13 +713,13 @@ void CoroCloner::create() { // If we have a continuation prototype, just use its attributes, // full-stop. NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); + + addFramePointerAttrs(NewAttrs, Context, 0, + Shape.getRetconCoroId()->getStorageSize(), + Shape.getRetconCoroId()->getStorageAlignment()); break; } - // Make the frame parameter nonnull and noalias. - NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull); - NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias); - switch (Shape.ABI) { // In these ABIs, the cloned functions always return 'void', and the // existing return sites are meaningless. Note that for unique @@ -872,7 +894,8 @@ static void postSplitCleanup(Function &F) { // For now, we do a mandatory verification step because we don't // entirely trust this pass. Note that we don't want to add a verifier // pass to FPM below because it will also verify all the global data. - verifyFunction(F); + if (verifyFunction(F, &errs())) + report_fatal_error("Broken function"); legacy::FunctionPassManager FPM(F.getParent()); @@ -911,17 +934,14 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { BasicBlock *UnconditionalSucc = nullptr; Instruction *I = InitialInst; - while (I->isTerminator()) { + while (I->isTerminator() || + (isa<CmpInst>(I) && I->getNextNode()->isTerminator())) { if (isa<ReturnInst>(I)) { if (I != InitialInst) { // If InitialInst is an unconditional branch, // remove PHI values that come from basic block of InitialInst if (UnconditionalSucc) - for (PHINode &PN : UnconditionalSucc->phis()) { - int idx = PN.getBasicBlockIndex(InitialInst->getParent()); - if (idx != -1) - PN.removeIncomingValue(idx); - } + UnconditionalSucc->removePredecessor(InitialInst->getParent(), true); ReplaceInstWithInst(InitialInst, I->clone()); } return true; @@ -935,6 +955,29 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { I = BB->getFirstNonPHIOrDbgOrLifetime(); continue; } + } else if (auto *CondCmp = dyn_cast<CmpInst>(I)) { + auto *BR = dyn_cast<BranchInst>(I->getNextNode()); + if (BR && BR->isConditional() && CondCmp == BR->getCondition()) { + // If the case number of suspended switch instruction is reduced to + // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator. + // And the comparsion looks like : %cond = icmp eq i8 %V, constant. + ConstantInt *CondConst = dyn_cast<ConstantInt>(CondCmp->getOperand(1)); + if (CondConst && CondCmp->getPredicate() == CmpInst::ICMP_EQ) { + Value *V = CondCmp->getOperand(0); + auto it = ResolvedValues.find(V); + if (it != ResolvedValues.end()) + V = it->second; + + if (ConstantInt *Cond0 = dyn_cast<ConstantInt>(V)) { + BasicBlock *BB = Cond0->equalsInt(CondConst->getZExtValue()) + ? BR->getSuccessor(0) + : BR->getSuccessor(1); + scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); + I = BB->getFirstNonPHIOrDbgOrLifetime(); + continue; + } + } + } } else if (auto *SI = dyn_cast<SwitchInst>(I)) { Value *V = SI->getCondition(); auto it = ResolvedValues.find(V); @@ -952,6 +995,37 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { return false; } +// Check whether CI obeys the rules of musttail attribute. +static bool shouldBeMustTail(const CallInst &CI, const Function &F) { + if (CI.isInlineAsm()) + return false; + + // Match prototypes and calling conventions of resume function. + FunctionType *CalleeTy = CI.getFunctionType(); + if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1)) + return false; + + Type *CalleeParmTy = CalleeTy->getParamType(0); + if (!CalleeParmTy->isPointerTy() || + (CalleeParmTy->getPointerAddressSpace() != 0)) + return false; + + if (CI.getCallingConv() != F.getCallingConv()) + return false; + + // CI should not has any ABI-impacting function attributes. + static const Attribute::AttrKind ABIAttrs[] = { + Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca, + Attribute::Preallocated, Attribute::InReg, Attribute::Returned, + Attribute::SwiftSelf, Attribute::SwiftError}; + AttributeList Attrs = CI.getAttributes(); + for (auto AK : ABIAttrs) + if (Attrs.hasParamAttribute(0, AK)) + return false; + + return true; +} + // Add musttail to any resume instructions that is immediately followed by a // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call // for symmetrical coroutine control transfer (C++ Coroutines TS extension). @@ -964,11 +1038,8 @@ static void addMustTailToCoroResumes(Function &F) { SmallVector<CallInst *, 4> Resumes; for (auto &I : instructions(F)) if (auto *Call = dyn_cast<CallInst>(&I)) - if (auto *CalledValue = Call->getCalledValue()) - // CoroEarly pass replaced coro resumes with indirect calls to an - // address return by CoroSubFnInst intrinsic. See if it is one of those. - if (isa<CoroSubFnInst>(CalledValue->stripPointerCasts())) - Resumes.push_back(Call); + if (shouldBeMustTail(*Call, F)) + Resumes.push_back(Call); // Set musttail on those that are followed by a ret instruction. for (CallInst *Call : Resumes) @@ -993,8 +1064,8 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) { coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); if (AllocInst) { IRBuilder<> Builder(AllocInst); - // FIXME: Need to handle overaligned members. auto *Frame = Builder.CreateAlloca(Shape.FrameTy); + Frame->setAlignment(Shape.FrameAlign); auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); AllocInst->replaceAllUsesWith(Builder.getFalse()); AllocInst->eraseFromParent(); @@ -1023,7 +1094,7 @@ static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) { if (isa<IntrinsicInst>(I)) continue; - if (CallSite(I)) + if (isa<CallBase>(I)) return true; } return false; @@ -1093,13 +1164,11 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend, Prev = Pred->getTerminator(); } - CallSite CS{Prev}; - if (!CS) + CallBase *CB = dyn_cast<CallBase>(Prev); + if (!CB) return false; - auto *CallInstr = CS.getInstruction(); - - auto *Callee = CS.getCalledValue()->stripPointerCasts(); + auto *Callee = CB->getCalledOperand()->stripPointerCasts(); // See if the callsite is for resumption or destruction of the coroutine. auto *SubFn = dyn_cast<CoroSubFnInst>(Callee); @@ -1114,7 +1183,7 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend, // calls in between Save and CallInstr. They can potenitally resume the // coroutine rendering this optimization unsafe. auto *Save = Suspend->getCoroSave(); - if (hasCallsBetween(Save, CallInstr)) + if (hasCallsBetween(Save, CB)) return false; // Replace llvm.coro.suspend with the value that results in resumption over @@ -1124,13 +1193,13 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend, Save->eraseFromParent(); // No longer need a call to coro.resume or coro.destroy. - if (auto *Invoke = dyn_cast<InvokeInst>(CallInstr)) { + if (auto *Invoke = dyn_cast<InvokeInst>(CB)) { BranchInst::Create(Invoke->getNormalDest(), Invoke); } - // Grab the CalledValue from CS before erasing the CallInstr. - auto *CalledValue = CS.getCalledValue(); - CallInstr->eraseFromParent(); + // Grab the CalledValue from CB before erasing the CallInstr. + auto *CalledValue = CB->getCalledOperand(); + CB->eraseFromParent(); // If no more users remove it. Usually it is a bitcast of SubFn. if (CalledValue != SubFn && CalledValue->user_empty()) @@ -1155,7 +1224,10 @@ static void simplifySuspendPoints(coro::Shape &Shape) { if (N == 0) return; while (true) { - if (simplifySuspendPoint(cast<CoroSuspendInst>(S[I]), Shape.CoroBegin)) { + auto SI = cast<CoroSuspendInst>(S[I]); + // Leave final.suspend to handleFinalSuspend since it is undefined behavior + // to resume a coroutine suspended at the final suspend point. + if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) { if (--N == I) break; std::swap(S[I], S[N]); @@ -1225,6 +1297,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape, // Allocate. We don't need to update the call graph node because we're // going to recompute it from scratch after splitting. + // FIXME: pass the required alignment RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); RawFramePtr = Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); @@ -1342,19 +1415,8 @@ namespace { }; } -static void splitCoroutine(Function &F, coro::Shape &Shape, - SmallVectorImpl<Function *> &Clones) { - switch (Shape.ABI) { - case coro::ABI::Switch: - return splitSwitchCoroutine(F, Shape, Clones); - case coro::ABI::Retcon: - case coro::ABI::RetconOnce: - return splitRetconCoroutine(F, Shape, Clones); - } - llvm_unreachable("bad ABI kind"); -} - -static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { +static coro::Shape splitCoroutine(Function &F, + SmallVectorImpl<Function *> &Clones) { PrettyStackTraceFunction prettyStackTrace(F); // The suspend-crossing algorithm in buildCoroutineFrame get tripped @@ -1363,26 +1425,42 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { coro::Shape Shape(F); if (!Shape.CoroBegin) - return; + return Shape; simplifySuspendPoints(Shape); buildCoroutineFrame(F, Shape); replaceFrameSize(Shape); - SmallVector<Function*, 4> Clones; - // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. if (Shape.CoroSuspends.empty()) { handleNoSuspendCoroutine(Shape); } else { - splitCoroutine(F, Shape, Clones); + switch (Shape.ABI) { + case coro::ABI::Switch: + splitSwitchCoroutine(F, Shape, Clones); + break; + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + splitRetconCoroutine(F, Shape, Clones); + break; + } } // Replace all the swifterror operations in the original function. // This invalidates SwiftErrorOps in the Shape. replaceSwiftErrorOps(F, Shape, nullptr); + return Shape; +} + +static void +updateCallGraphAfterCoroutineSplit(Function &F, const coro::Shape &Shape, + const SmallVectorImpl<Function *> &Clones, + CallGraph &CG, CallGraphSCC &SCC) { + if (!Shape.CoroBegin) + return; + removeCoroEnds(Shape, &CG); postSplitCleanup(F); @@ -1390,6 +1468,44 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { coro::updateCallGraph(F, Clones, CG, SCC); } +static void updateCallGraphAfterCoroutineSplit( + LazyCallGraph::Node &N, const coro::Shape &Shape, + const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, + LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, + FunctionAnalysisManager &FAM) { + if (!Shape.CoroBegin) + return; + + for (llvm::CoroEndInst *End : Shape.CoroEnds) { + auto &Context = End->getContext(); + End->replaceAllUsesWith(ConstantInt::getFalse(Context)); + End->eraseFromParent(); + } + + postSplitCleanup(N.getFunction()); + + // To insert the newly created coroutine funclets 'f.resume', 'f.destroy', and + // 'f.cleanup' into the same SCC as the coroutine 'f' they were outlined from, + // we make use of the CallGraphUpdater class, which can modify the internal + // state of the LazyCallGraph. + for (Function *Clone : Clones) + CG.addNewFunctionIntoRefSCC(*Clone, C.getOuterRefSCC()); + + // We've inserted instructions into coroutine 'f' that reference the three new + // coroutine funclets. We must now update the call graph so that reference + // edges between 'f' and its funclets are added to it. LazyCallGraph only + // allows CGSCC passes to insert "trivial" reference edges. We've ensured + // above, by inserting the funclets into the same SCC as the corutine, that + // the edges are trivial. + // + // N.B.: If we didn't update the call graph here, a CGSCCToFunctionPassAdaptor + // later in this CGSCC pass pipeline may be run, triggering a call graph + // update of its own. Function passes run by the adaptor are not permitted to + // add new edges of any kind to the graph, and the new edges inserted by this + // pass would be misattributed to that unrelated function pass. + updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM); +} + // When we see the coroutine the first time, we insert an indirect call to a // devirt trigger function and mark the coroutine that it is now ready for // split. @@ -1521,12 +1637,89 @@ static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) { return Changed; } -//===----------------------------------------------------------------------===// -// Top Level Driver -//===----------------------------------------------------------------------===// +static bool declaresCoroSplitIntrinsics(const Module &M) { + return coro::declaresIntrinsics( + M, {"llvm.coro.begin", "llvm.coro.prepare.retcon"}); +} + +PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR) { + // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a + // non-zero number of nodes, so we assume that here and grab the first + // node's function's module. + Module &M = *C.begin()->getFunction().getParent(); + auto &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); + + if (!declaresCoroSplitIntrinsics(M)) + return PreservedAnalyses::all(); + + // Check for uses of llvm.coro.prepare.retcon. + const auto *PrepareFn = M.getFunction("llvm.coro.prepare.retcon"); + if (PrepareFn && PrepareFn->use_empty()) + PrepareFn = nullptr; + + // Find coroutines for processing. + SmallVector<LazyCallGraph::Node *, 4> Coroutines; + for (LazyCallGraph::Node &N : C) + if (N.getFunction().hasFnAttribute(CORO_PRESPLIT_ATTR)) + Coroutines.push_back(&N); + + if (Coroutines.empty() && !PrepareFn) + return PreservedAnalyses::all(); + + if (Coroutines.empty()) + llvm_unreachable("new pass manager cannot yet handle " + "'llvm.coro.prepare.retcon'"); + + // Split all the coroutines. + for (LazyCallGraph::Node *N : Coroutines) { + Function &F = N->getFunction(); + Attribute Attr = F.getFnAttribute(CORO_PRESPLIT_ATTR); + StringRef Value = Attr.getValueAsString(); + LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() + << "' state: " << Value << "\n"); + if (Value == UNPREPARED_FOR_SPLIT) { + // Enqueue a second iteration of the CGSCC pipeline. + // N.B.: + // The CoroSplitLegacy pass "triggers" a restart of the CGSCC pass + // pipeline by inserting an indirect function call that the + // CoroElideLegacy pass then replaces with a direct function call. The + // legacy CGSCC pipeline's implicit behavior was as if wrapped in the new + // pass manager abstraction DevirtSCCRepeatedPass. + // + // This pass does not need to "trigger" another run of the pipeline. + // Instead, it simply enqueues the same RefSCC onto the pipeline's + // worklist. + UR.CWorklist.insert(&C); + F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); + continue; + } + F.removeFnAttr(CORO_PRESPLIT_ATTR); + + SmallVector<Function *, 4> Clones; + const coro::Shape Shape = splitCoroutine(F, Clones); + updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); + } + + if (PrepareFn) + llvm_unreachable("new pass manager cannot yet handle " + "'llvm.coro.prepare.retcon'"); + + return PreservedAnalyses::none(); +} namespace { +// We present a coroutine to LLVM as an ordinary function with suspension +// points marked up with intrinsics. We let the optimizer party on the coroutine +// as a single function for as long as possible. Shortly before the coroutine is +// eligible to be inlined into its callers, we split up the coroutine into parts +// corresponding to initial, resume and destroy invocations of the coroutine, +// add them to the current SCC and restart the IPO pipeline to optimize the +// coroutine subfunctions we extracted before proceeding to the caller of the +// coroutine. struct CoroSplitLegacy : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid @@ -1539,9 +1732,7 @@ struct CoroSplitLegacy : public CallGraphSCCPass { // A coroutine is identified by the presence of coro.begin intrinsic, if // we don't have any, this pass has nothing to do. bool doInitialization(CallGraph &CG) override { - Run = coro::declaresIntrinsics(CG.getModule(), - {"llvm.coro.begin", - "llvm.coro.prepare.retcon"}); + Run = declaresCoroSplitIntrinsics(CG.getModule()); return CallGraphSCCPass::doInitialization(CG); } @@ -1583,7 +1774,10 @@ struct CoroSplitLegacy : public CallGraphSCCPass { continue; } F->removeFnAttr(CORO_PRESPLIT_ATTR); - splitCoroutine(*F, CG, SCC); + + SmallVector<Function *, 4> Clones; + const coro::Shape Shape = splitCoroutine(*F, Clones); + updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC); } if (PrepareFn) diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 02d11af3303f..87c3a8b0d0cf 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -649,3 +648,9 @@ void LLVMAddCoroElidePass(LLVMPassManagerRef PM) { void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCoroCleanupLegacyPass()); } + +void +LLVMPassManagerBuilderAddCoroutinePassesToExtensionPoints(LLVMPassManagerBuilderRef PMB) { + PassManagerBuilder *Builder = unwrap(PMB); + addCoroutinePassesToExtensionPoints(*Builder); +} |