1 files changed, 256 insertions, 62 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 66cb3e74e53e..9c4392e7999b 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -18,6 +18,7 @@
 // coroutine.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Coroutines/CoroSplit.h"
 #include "CoroInstr.h"
 #include "CoroInternal.h"
 #include "llvm/ADT/DenseMap.h"
@@ -31,7 +32,6 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -59,6 +59,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -74,7 +75,7 @@ using namespace llvm;
 
 namespace {
 
-/// A little helper class for building 
+/// A little helper class for building
 class CoroCloner {
 public:
   enum class Kind {
@@ -283,7 +284,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
   auto *FramePtr = Shape.FramePtr;
   auto *FrameTy = Shape.FrameTy;
   auto *GepIndex = Builder.CreateStructGEP(
-      FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
+      FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
   auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
   auto *Switch =
       Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
@@ -309,7 +310,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
       Builder.CreateStore(NullPtr, GepIndex);
     } else {
       auto *GepIndex = Builder.CreateStructGEP(
-          FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
+          FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
       Builder.CreateStore(IndexVal, GepIndex);
     }
     Save->replaceAllUsesWith(ConstantTokenNone::get(C));
@@ -562,11 +563,12 @@ void CoroCloner::replaceEntryBlock() {
   // In the original function, the AllocaSpillBlock is a block immediately
   // following the allocation of the frame object which defines GEPs for
   // all the allocas that have been moved into the frame, and it ends by
-  // branching to the original beginning of the coroutine.  Make this 
+  // branching to the original beginning of the coroutine.  Make this
   // the entry block of the cloned function.
   auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
+  auto *OldEntry = &NewF->getEntryBlock();
   Entry->setName("entry" + Suffix);
-  Entry->moveBefore(&NewF->getEntryBlock());
+  Entry->moveBefore(OldEntry);
   Entry->getTerminator()->eraseFromParent();
 
   // Clear all predecessors of the new entry block.  There should be
@@ -579,8 +581,14 @@ void CoroCloner::replaceEntryBlock() {
   Builder.CreateUnreachable();
   BranchToEntry->eraseFromParent();
 
-  // TODO: move any allocas into Entry that weren't moved into the frame.
-  // (Currently we move all allocas into the frame.)
+  // Move any allocas into Entry that weren't moved into the frame.
+  for (auto IT = OldEntry->begin(), End = OldEntry->end(); IT != End;) {
+    Instruction &I = *IT++;
+    if (!isa<AllocaInst>(&I) || I.use_empty())
+      continue;
+
+    I.moveBefore(*Entry, Entry->getFirstInsertionPt());
+  }
 
   // Branch from the entry to the appropriate place.
   Builder.SetInsertPoint(Entry);
@@ -630,12 +638,23 @@ Value *CoroCloner::deriveNewFramePointer() {
     // Otherwise, load the real frame from the opaque storage.
     auto FramePtrPtr =
       Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo());
-    return Builder.CreateLoad(FramePtrPtr);
+    return Builder.CreateLoad(FramePtrTy, FramePtrPtr);
   }
   }
   llvm_unreachable("bad ABI");
 }
 
+static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
+                                 unsigned ParamIndex,
+                                 uint64_t Size, Align Alignment) {
+  AttrBuilder ParamAttrs;
+  ParamAttrs.addAttribute(Attribute::NonNull);
+  ParamAttrs.addAttribute(Attribute::NoAlias);
+  ParamAttrs.addAlignmentAttr(Alignment);
+  ParamAttrs.addDereferenceableAttr(Size);
+  Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
+}
+
 /// Clone the body of the original function into a resume function of
 /// some sort.
 void CoroCloner::create() {
@@ -684,6 +703,9 @@ void CoroCloner::create() {
     // original function.  This should include optimization settings and so on.
     NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex,
                                       OrigAttrs.getFnAttributes());
+
+    addFramePointerAttrs(NewAttrs, Context, 0,
+                         Shape.FrameSize, Shape.FrameAlign);
     break;
 
   case coro::ABI::Retcon:
@@ -691,13 +713,13 @@ void CoroCloner::create() {
     // If we have a continuation prototype, just use its attributes,
     // full-stop.
     NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
+
+    addFramePointerAttrs(NewAttrs, Context, 0,
+                         Shape.getRetconCoroId()->getStorageSize(),
+                         Shape.getRetconCoroId()->getStorageAlignment());
     break;
   }
 
-  // Make the frame parameter nonnull and noalias.
-  NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull);
-  NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias);
-
   switch (Shape.ABI) {
   // In these ABIs, the cloned functions always return 'void', and the
   // existing return sites are meaningless.  Note that for unique
@@ -872,7 +894,8 @@ static void postSplitCleanup(Function &F) {
   // For now, we do a mandatory verification step because we don't
   // entirely trust this pass.  Note that we don't want to add a verifier
   // pass to FPM below because it will also verify all the global data.
-  verifyFunction(F);
+  if (verifyFunction(F, &errs()))
+    report_fatal_error("Broken function");
 
   legacy::FunctionPassManager FPM(F.getParent());
 
@@ -911,17 +934,14 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
   BasicBlock *UnconditionalSucc = nullptr;
 
   Instruction *I = InitialInst;
-  while (I->isTerminator()) {
+  while (I->isTerminator() ||
+         (isa<CmpInst>(I) && I->getNextNode()->isTerminator())) {
     if (isa<ReturnInst>(I)) {
       if (I != InitialInst) {
         // If InitialInst is an unconditional branch,
         // remove PHI values that come from basic block of InitialInst
         if (UnconditionalSucc)
-          for (PHINode &PN : UnconditionalSucc->phis()) {
-            int idx = PN.getBasicBlockIndex(InitialInst->getParent());
-            if (idx != -1)
-              PN.removeIncomingValue(idx);
-          }
+          UnconditionalSucc->removePredecessor(InitialInst->getParent(), true);
         ReplaceInstWithInst(InitialInst, I->clone());
       }
       return true;
@@ -935,6 +955,29 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
         I = BB->getFirstNonPHIOrDbgOrLifetime();
         continue;
       }
+    } else if (auto *CondCmp = dyn_cast<CmpInst>(I)) {
+      auto *BR = dyn_cast<BranchInst>(I->getNextNode());
+      if (BR && BR->isConditional() && CondCmp == BR->getCondition()) {
+        // If the case number of suspended switch instruction is reduced to
+        // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
+        // And the comparsion looks like : %cond = icmp eq i8 %V, constant.
+        ConstantInt *CondConst = dyn_cast<ConstantInt>(CondCmp->getOperand(1));
+        if (CondConst && CondCmp->getPredicate() == CmpInst::ICMP_EQ) {
+          Value *V = CondCmp->getOperand(0);
+          auto it = ResolvedValues.find(V);
+          if (it != ResolvedValues.end())
+            V = it->second;
+
+          if (ConstantInt *Cond0 = dyn_cast<ConstantInt>(V)) {
+            BasicBlock *BB = Cond0->equalsInt(CondConst->getZExtValue())
+                                 ? BR->getSuccessor(0)
+                                 : BR->getSuccessor(1);
+            scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
+            I = BB->getFirstNonPHIOrDbgOrLifetime();
+            continue;
+          }
+        }
+      }
     } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
       Value *V = SI->getCondition();
       auto it = ResolvedValues.find(V);
@@ -952,6 +995,37 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
   return false;
 }
 
+// Check whether CI obeys the rules of musttail attribute.
+static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
+  if (CI.isInlineAsm())
+    return false;
+
+  // Match prototypes and calling conventions of resume function.
+  FunctionType *CalleeTy = CI.getFunctionType();
+  if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1))
+    return false;
+
+  Type *CalleeParmTy = CalleeTy->getParamType(0);
+  if (!CalleeParmTy->isPointerTy() ||
+      (CalleeParmTy->getPointerAddressSpace() != 0))
+    return false;
+
+  if (CI.getCallingConv() != F.getCallingConv())
+    return false;
+
+  // CI should not has any ABI-impacting function attributes.
+  static const Attribute::AttrKind ABIAttrs[] = {
+      Attribute::StructRet,    Attribute::ByVal,     Attribute::InAlloca,
+      Attribute::Preallocated, Attribute::InReg,     Attribute::Returned,
+      Attribute::SwiftSelf,    Attribute::SwiftError};
+  AttributeList Attrs = CI.getAttributes();
+  for (auto AK : ABIAttrs)
+    if (Attrs.hasParamAttribute(0, AK))
+      return false;
+
+  return true;
+}
+
 // Add musttail to any resume instructions that is immediately followed by a
 // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
 // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
@@ -964,11 +1038,8 @@ static void addMustTailToCoroResumes(Function &F) {
   SmallVector<CallInst *, 4> Resumes;
   for (auto &I : instructions(F))
     if (auto *Call = dyn_cast<CallInst>(&I))
-      if (auto *CalledValue = Call->getCalledValue())
-        // CoroEarly pass replaced coro resumes with indirect calls to an
-        // address return by CoroSubFnInst intrinsic. See if it is one of those.
-        if (isa<CoroSubFnInst>(CalledValue->stripPointerCasts()))
-          Resumes.push_back(Call);
+      if (shouldBeMustTail(*Call, F))
+        Resumes.push_back(Call);
 
   // Set musttail on those that are followed by a ret instruction.
   for (CallInst *Call : Resumes)
@@ -993,8 +1064,8 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) {
     coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
     if (AllocInst) {
       IRBuilder<> Builder(AllocInst);
-      // FIXME: Need to handle overaligned members.
       auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
+      Frame->setAlignment(Shape.FrameAlign);
       auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
       AllocInst->replaceAllUsesWith(Builder.getFalse());
       AllocInst->eraseFromParent();
@@ -1023,7 +1094,7 @@ static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
     if (isa<IntrinsicInst>(I))
       continue;
 
-    if (CallSite(I))
+    if (isa<CallBase>(I))
       return true;
   }
   return false;
@@ -1093,13 +1164,11 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
     Prev = Pred->getTerminator();
   }
 
-  CallSite CS{Prev};
-  if (!CS)
+  CallBase *CB = dyn_cast<CallBase>(Prev);
+  if (!CB)
     return false;
 
-  auto *CallInstr = CS.getInstruction();
-
-  auto *Callee = CS.getCalledValue()->stripPointerCasts();
+  auto *Callee = CB->getCalledOperand()->stripPointerCasts();
 
   // See if the callsite is for resumption or destruction of the coroutine.
   auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
@@ -1114,7 +1183,7 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
   // calls in between Save and CallInstr. They can potenitally resume the
   // coroutine rendering this optimization unsafe.
   auto *Save = Suspend->getCoroSave();
-  if (hasCallsBetween(Save, CallInstr))
+  if (hasCallsBetween(Save, CB))
     return false;
 
   // Replace llvm.coro.suspend with the value that results in resumption over
@@ -1124,13 +1193,13 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
   Save->eraseFromParent();
 
   // No longer need a call to coro.resume or coro.destroy.
-  if (auto *Invoke = dyn_cast<InvokeInst>(CallInstr)) {
+  if (auto *Invoke = dyn_cast<InvokeInst>(CB)) {
     BranchInst::Create(Invoke->getNormalDest(), Invoke);
   }
 
-  // Grab the CalledValue from CS before erasing the CallInstr.
-  auto *CalledValue = CS.getCalledValue();
-  CallInstr->eraseFromParent();
+  // Grab the CalledValue from CB before erasing the CallInstr.
+  auto *CalledValue = CB->getCalledOperand();
+  CB->eraseFromParent();
 
   // If no more users remove it. Usually it is a bitcast of SubFn.
   if (CalledValue != SubFn && CalledValue->user_empty())
@@ -1155,7 +1224,10 @@ static void simplifySuspendPoints(coro::Shape &Shape) {
   if (N == 0)
     return;
   while (true) {
-    if (simplifySuspendPoint(cast<CoroSuspendInst>(S[I]), Shape.CoroBegin)) {
+    auto SI = cast<CoroSuspendInst>(S[I]);
+    // Leave final.suspend to handleFinalSuspend since it is undefined behavior
+    // to resume a coroutine suspended at the final suspend point.
+    if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) {
       if (--N == I)
         break;
       std::swap(S[I], S[N]);
@@ -1225,6 +1297,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
 
     // Allocate.  We don't need to update the call graph node because we're
     // going to recompute it from scratch after splitting.
+    // FIXME: pass the required alignment
     RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
     RawFramePtr =
       Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
@@ -1342,19 +1415,8 @@ namespace {
   };
 }
 
-static void splitCoroutine(Function &F, coro::Shape &Shape,
-                           SmallVectorImpl<Function *> &Clones) {
-  switch (Shape.ABI) {
-  case coro::ABI::Switch:
-    return splitSwitchCoroutine(F, Shape, Clones);
-  case coro::ABI::Retcon:
-  case coro::ABI::RetconOnce:
-    return splitRetconCoroutine(F, Shape, Clones);
-  }
-  llvm_unreachable("bad ABI kind");
-}
-
-static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
+static coro::Shape splitCoroutine(Function &F,
+                                  SmallVectorImpl<Function *> &Clones) {
   PrettyStackTraceFunction prettyStackTrace(F);
 
   // The suspend-crossing algorithm in buildCoroutineFrame get tripped
@@ -1363,26 +1425,42 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
 
   coro::Shape Shape(F);
   if (!Shape.CoroBegin)
-    return;
+    return Shape;
 
   simplifySuspendPoints(Shape);
   buildCoroutineFrame(F, Shape);
   replaceFrameSize(Shape);
 
-  SmallVector<Function*, 4> Clones;
-
   // If there are no suspend points, no split required, just remove
   // the allocation and deallocation blocks, they are not needed.
   if (Shape.CoroSuspends.empty()) {
     handleNoSuspendCoroutine(Shape);
   } else {
-    splitCoroutine(F, Shape, Clones);
+    switch (Shape.ABI) {
+    case coro::ABI::Switch:
+      splitSwitchCoroutine(F, Shape, Clones);
+      break;
+    case coro::ABI::Retcon:
+    case coro::ABI::RetconOnce:
+      splitRetconCoroutine(F, Shape, Clones);
+      break;
+    }
   }
 
   // Replace all the swifterror operations in the original function.
   // This invalidates SwiftErrorOps in the Shape.
   replaceSwiftErrorOps(F, Shape, nullptr);
 
+  return Shape;
+}
+
+static void
+updateCallGraphAfterCoroutineSplit(Function &F, const coro::Shape &Shape,
+                                   const SmallVectorImpl<Function *> &Clones,
+                                   CallGraph &CG, CallGraphSCC &SCC) {
+  if (!Shape.CoroBegin)
+    return;
+
   removeCoroEnds(Shape, &CG);
   postSplitCleanup(F);
 
@@ -1390,6 +1468,44 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
   coro::updateCallGraph(F, Clones, CG, SCC);
 }
 
+static void updateCallGraphAfterCoroutineSplit(
+    LazyCallGraph::Node &N, const coro::Shape &Shape,
+    const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
+    LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
+    FunctionAnalysisManager &FAM) {
+  if (!Shape.CoroBegin)
+    return;
+
+  for (llvm::CoroEndInst *End : Shape.CoroEnds) {
+    auto &Context = End->getContext();
+    End->replaceAllUsesWith(ConstantInt::getFalse(Context));
+    End->eraseFromParent();
+  }
+
+  postSplitCleanup(N.getFunction());
+
+  // To insert the newly created coroutine funclets 'f.resume', 'f.destroy', and
+  // 'f.cleanup' into the same SCC as the coroutine 'f' they were outlined from,
+  // we make use of the CallGraphUpdater class, which can modify the internal
+  // state of the LazyCallGraph.
+  for (Function *Clone : Clones)
+    CG.addNewFunctionIntoRefSCC(*Clone, C.getOuterRefSCC());
+
+  // We've inserted instructions into coroutine 'f' that reference the three new
+  // coroutine funclets. We must now update the call graph so that reference
+  // edges between 'f' and its funclets are added to it. LazyCallGraph only
+  // allows CGSCC passes to insert "trivial" reference edges. We've ensured
+  // above, by inserting the funclets into the same SCC as the corutine, that
+  // the edges are trivial.
+  //
+  // N.B.: If we didn't update the call graph here, a CGSCCToFunctionPassAdaptor
+  // later in this CGSCC pass pipeline may be run, triggering a call graph
+  // update of its own. Function passes run by the adaptor are not permitted to
+  // add new edges of any kind to the graph, and the new edges inserted by this
+  // pass would be misattributed to that unrelated function pass.
+  updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
+}
+
 // When we see the coroutine the first time, we insert an indirect call to a
 // devirt trigger function and mark the coroutine that it is now ready for
 // split.
@@ -1521,12 +1637,89 @@ static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
   return Changed;
 }
 
-//===----------------------------------------------------------------------===//
-//                              Top Level Driver
-//===----------------------------------------------------------------------===//
+static bool declaresCoroSplitIntrinsics(const Module &M) {
+  return coro::declaresIntrinsics(
+      M, {"llvm.coro.begin", "llvm.coro.prepare.retcon"});
+}
+
+PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
+                                     CGSCCAnalysisManager &AM,
+                                     LazyCallGraph &CG, CGSCCUpdateResult &UR) {
+  // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a
+  //     non-zero number of nodes, so we assume that here and grab the first
+  //     node's function's module.
+  Module &M = *C.begin()->getFunction().getParent();
+  auto &FAM =
+      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+
+  if (!declaresCoroSplitIntrinsics(M))
+    return PreservedAnalyses::all();
+
+  // Check for uses of llvm.coro.prepare.retcon.
+  const auto *PrepareFn = M.getFunction("llvm.coro.prepare.retcon");
+  if (PrepareFn && PrepareFn->use_empty())
+    PrepareFn = nullptr;
+
+  // Find coroutines for processing.
+  SmallVector<LazyCallGraph::Node *, 4> Coroutines;
+  for (LazyCallGraph::Node &N : C)
+    if (N.getFunction().hasFnAttribute(CORO_PRESPLIT_ATTR))
+      Coroutines.push_back(&N);
+
+  if (Coroutines.empty() && !PrepareFn)
+    return PreservedAnalyses::all();
+
+  if (Coroutines.empty())
+    llvm_unreachable("new pass manager cannot yet handle "
+                     "'llvm.coro.prepare.retcon'");
+
+  // Split all the coroutines.
+  for (LazyCallGraph::Node *N : Coroutines) {
+    Function &F = N->getFunction();
+    Attribute Attr = F.getFnAttribute(CORO_PRESPLIT_ATTR);
+    StringRef Value = Attr.getValueAsString();
+    LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
+                      << "' state: " << Value << "\n");
+    if (Value == UNPREPARED_FOR_SPLIT) {
+      // Enqueue a second iteration of the CGSCC pipeline.
+      // N.B.:
+      // The CoroSplitLegacy pass "triggers" a restart of the CGSCC pass
+      // pipeline by inserting an indirect function call that the
+      // CoroElideLegacy pass then replaces with a direct function call. The
+      // legacy CGSCC pipeline's implicit behavior was as if wrapped in the new
+      // pass manager abstraction DevirtSCCRepeatedPass.
+      //
+      // This pass does not need to "trigger" another run of the pipeline.
+      // Instead, it simply enqueues the same RefSCC onto the pipeline's
+      // worklist.
+      UR.CWorklist.insert(&C);
+      F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
+      continue;
+    }
+    F.removeFnAttr(CORO_PRESPLIT_ATTR);
+
+    SmallVector<Function *, 4> Clones;
+    const coro::Shape Shape = splitCoroutine(F, Clones);
+    updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
+  }
+
+  if (PrepareFn)
+    llvm_unreachable("new pass manager cannot yet handle "
+                     "'llvm.coro.prepare.retcon'");
+
+  return PreservedAnalyses::none();
+}
 
 namespace {
 
+// We present a coroutine to LLVM as an ordinary function with suspension
+// points marked up with intrinsics. We let the optimizer party on the coroutine
+// as a single function for as long as possible. Shortly before the coroutine is
+// eligible to be inlined into its callers, we split up the coroutine into parts
+// corresponding to initial, resume and destroy invocations of the coroutine,
+// add them to the current SCC and restart the IPO pipeline to optimize the
+// coroutine subfunctions we extracted before proceeding to the caller of the
+// coroutine.
 struct CoroSplitLegacy : public CallGraphSCCPass {
   static char ID; // Pass identification, replacement for typeid
 
@@ -1539,9 +1732,7 @@ struct CoroSplitLegacy : public CallGraphSCCPass {
   // A coroutine is identified by the presence of coro.begin intrinsic, if
   // we don't have any, this pass has nothing to do.
   bool doInitialization(CallGraph &CG) override {
-    Run = coro::declaresIntrinsics(CG.getModule(),
-                                   {"llvm.coro.begin",
-                                    "llvm.coro.prepare.retcon"});
+    Run = declaresCoroSplitIntrinsics(CG.getModule());
     return CallGraphSCCPass::doInitialization(CG);
   }
 
@@ -1583,7 +1774,10 @@ struct CoroSplitLegacy : public CallGraphSCCPass {
         continue;
       }
       F->removeFnAttr(CORO_PRESPLIT_ATTR);
-      splitCoroutine(*F, CG, SCC);
+
+      SmallVector<Function *, 4> Clones;
+      const coro::Shape Shape = splitCoroutine(*F, Clones);
+      updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC);
     }
 
     if (PrepareFn)