aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Coroutines
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Coroutines')
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroCleanup.cpp28
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroEarly.cpp67
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroElide.cpp211
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp515
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInstr.h17
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInternal.h42
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp318
-rw-r--r--llvm/lib/Transforms/Coroutines/Coroutines.cpp7
8 files changed, 928 insertions, 277 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index c2dbd6f41642..233eae37c497 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -5,9 +5,8 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// This pass lowers all remaining coroutine intrinsics.
-//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Coroutines/CoroCleanup.h"
#include "CoroInternal.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -90,12 +89,26 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
// After replacement were made we can cleanup the function body a little.
simplifyCFG(F);
}
+
return Changed;
}
-//===----------------------------------------------------------------------===//
-// Top Level Driver
-//===----------------------------------------------------------------------===//
+static bool declaresCoroCleanupIntrinsics(const Module &M) {
+ return coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin",
+ "llvm.coro.subfn.addr", "llvm.coro.free",
+ "llvm.coro.id", "llvm.coro.id.retcon",
+ "llvm.coro.id.retcon.once"});
+}
+
+PreservedAnalyses CoroCleanupPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &M = *F.getParent();
+ if (!declaresCoroCleanupIntrinsics(M) ||
+ !Lowerer(M).lowerRemainingCoroIntrinsics(F))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
namespace {
@@ -111,10 +124,7 @@ struct CoroCleanupLegacy : FunctionPass {
// This pass has work to do only if we find intrinsics we are going to lower
// in the module.
bool doInitialization(Module &M) override {
- if (coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin",
- "llvm.coro.subfn.addr", "llvm.coro.free",
- "llvm.coro.id", "llvm.coro.id.retcon",
- "llvm.coro.id.retcon.once"}))
+ if (declaresCoroCleanupIntrinsics(M))
L = std::make_unique<Lowerer>(M);
return false;
}
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index e73fb9eeb1e9..242e6c3f6b23 100644
--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -5,13 +5,9 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// This pass lowers coroutine intrinsics that hide the details of the exact
-// calling convention for coroutine resume and destroy functions and details of
-// the structure of the coroutine frame.
-//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Coroutines/CoroEarly.h"
#include "CoroInternal.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
@@ -28,7 +24,7 @@ class Lowerer : public coro::LowererBase {
PointerType *const AnyResumeFnPtrTy;
Constant *NoopCoro = nullptr;
- void lowerResumeOrDestroy(CallSite CS, CoroSubFnInst::ResumeKind);
+ void lowerResumeOrDestroy(CallBase &CB, CoroSubFnInst::ResumeKind);
void lowerCoroPromise(CoroPromiseInst *Intrin);
void lowerCoroDone(IntrinsicInst *II);
void lowerCoroNoop(IntrinsicInst *II);
@@ -47,12 +43,11 @@ public:
// an address returned by coro.subfn.addr intrinsic. This is done so that
// CGPassManager recognizes devirtualization when CoroElide pass replaces a call
// to coro.subfn.addr with an appropriate function address.
-void Lowerer::lowerResumeOrDestroy(CallSite CS,
+void Lowerer::lowerResumeOrDestroy(CallBase &CB,
CoroSubFnInst::ResumeKind Index) {
- Value *ResumeAddr =
- makeSubFnCall(CS.getArgOperand(0), Index, CS.getInstruction());
- CS.setCalledFunction(ResumeAddr);
- CS.setCallingConv(CallingConv::Fast);
+ Value *ResumeAddr = makeSubFnCall(CB.getArgOperand(0), Index, &CB);
+ CB.setCalledOperand(ResumeAddr);
+ CB.setCallingConv(CallingConv::Fast);
}
// Coroutine promise field is always at the fixed offset from the beginning of
@@ -64,14 +59,14 @@ void Lowerer::lowerResumeOrDestroy(CallSite CS,
// TODO: Handle the case when coroutine promise alloca has align override.
void Lowerer::lowerCoroPromise(CoroPromiseInst *Intrin) {
Value *Operand = Intrin->getArgOperand(0);
- unsigned Alignement = Intrin->getAlignment();
+ Align Alignment = Intrin->getAlignment();
Type *Int8Ty = Builder.getInt8Ty();
auto *SampleStruct =
StructType::get(Context, {AnyResumeFnPtrTy, AnyResumeFnPtrTy, Int8Ty});
const DataLayout &DL = TheModule.getDataLayout();
int64_t Offset = alignTo(
- DL.getStructLayout(SampleStruct)->getElementOffset(2), Alignement);
+ DL.getStructLayout(SampleStruct)->getElementOffset(2), Alignment);
if (Intrin->isFromPromise())
Offset = -Offset;
@@ -98,7 +93,7 @@ void Lowerer::lowerCoroDone(IntrinsicInst *II) {
Builder.SetInsertPoint(II);
auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy);
- auto *Load = Builder.CreateLoad(BCI);
+ auto *Load = Builder.CreateLoad(FrameTy, BCI);
auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
II->replaceAllUsesWith(Cond);
@@ -156,8 +151,8 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
SmallVector<CoroFreeInst *, 4> CoroFrees;
for (auto IB = inst_begin(F), IE = inst_end(F); IB != IE;) {
Instruction &I = *IB++;
- if (auto CS = CallSite(&I)) {
- switch (CS.getIntrinsicID()) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ switch (CB->getIntrinsicID()) {
default:
continue;
case Intrinsic::coro_free:
@@ -167,13 +162,13 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
// Make sure that final suspend point is not duplicated as CoroSplit
// pass expects that there is at most one final suspend point.
if (cast<CoroSuspendInst>(&I)->isFinal())
- CS.setCannotDuplicate();
+ CB->setCannotDuplicate();
break;
case Intrinsic::coro_end:
// Make sure that fallthrough coro.end is not duplicated as CoroSplit
// pass expects that there is at most one fallthrough coro.end.
if (cast<CoroEndInst>(&I)->isFallthrough())
- CS.setCannotDuplicate();
+ CB->setCannotDuplicate();
break;
case Intrinsic::coro_noop:
lowerCoroNoop(cast<IntrinsicInst>(&I));
@@ -195,10 +190,10 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
break;
case Intrinsic::coro_resume:
- lowerResumeOrDestroy(CS, CoroSubFnInst::ResumeIndex);
+ lowerResumeOrDestroy(*CB, CoroSubFnInst::ResumeIndex);
break;
case Intrinsic::coro_destroy:
- lowerResumeOrDestroy(CS, CoroSubFnInst::DestroyIndex);
+ lowerResumeOrDestroy(*CB, CoroSubFnInst::DestroyIndex);
break;
case Intrinsic::coro_promise:
lowerCoroPromise(cast<CoroPromiseInst>(&I));
@@ -219,9 +214,23 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
return Changed;
}
-//===----------------------------------------------------------------------===//
-// Top Level Driver
-//===----------------------------------------------------------------------===//
+static bool declaresCoroEarlyIntrinsics(const Module &M) {
+ return coro::declaresIntrinsics(
+ M, {"llvm.coro.id", "llvm.coro.id.retcon", "llvm.coro.id.retcon.once",
+ "llvm.coro.destroy", "llvm.coro.done", "llvm.coro.end",
+ "llvm.coro.noop", "llvm.coro.free", "llvm.coro.promise",
+ "llvm.coro.resume", "llvm.coro.suspend"});
+}
+
+PreservedAnalyses CoroEarlyPass::run(Function &F, FunctionAnalysisManager &) {
+ Module &M = *F.getParent();
+ if (!declaresCoroEarlyIntrinsics(M) || !Lowerer(M).lowerEarlyIntrinsics(F))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
namespace {
@@ -236,17 +245,7 @@ struct CoroEarlyLegacy : public FunctionPass {
// This pass has work to do only if we find intrinsics we are going to lower
// in the module.
bool doInitialization(Module &M) override {
- if (coro::declaresIntrinsics(M, {"llvm.coro.id",
- "llvm.coro.id.retcon",
- "llvm.coro.id.retcon.once",
- "llvm.coro.destroy",
- "llvm.coro.done",
- "llvm.coro.end",
- "llvm.coro.noop",
- "llvm.coro.free",
- "llvm.coro.promise",
- "llvm.coro.resume",
- "llvm.coro.suspend"}))
+ if (declaresCoroEarlyIntrinsics(M))
L = std::make_unique<Lowerer>(M);
return false;
}
diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index 23d22e23861a..9d364b3097c1 100644
--- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -5,12 +5,10 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// This pass replaces dynamic allocation of coroutine frame with alloca and
-// replaces calls to llvm.coro.resume and llvm.coro.destroy with direct calls
-// to coroutine sub-functions.
-//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Coroutines/CoroElide.h"
#include "CoroInternal.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/Dominators.h"
@@ -30,14 +28,19 @@ struct Lowerer : coro::LowererBase {
SmallVector<CoroBeginInst *, 1> CoroBegins;
SmallVector<CoroAllocInst *, 1> CoroAllocs;
SmallVector<CoroSubFnInst *, 4> ResumeAddr;
- SmallVector<CoroSubFnInst *, 4> DestroyAddr;
+ DenseMap<CoroBeginInst *, SmallVector<CoroSubFnInst *, 4>> DestroyAddr;
SmallVector<CoroFreeInst *, 1> CoroFrees;
+ SmallPtrSet<const SwitchInst *, 4> CoroSuspendSwitches;
Lowerer(Module &M) : LowererBase(M) {}
- void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA);
+ void elideHeapAllocations(Function *F, uint64_t FrameSize, Align FrameAlign,
+ AAResults &AA);
bool shouldElide(Function *F, DominatorTree &DT) const;
+ void collectPostSplitCoroIds(Function *F);
bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT);
+ bool hasEscapePath(const CoroBeginInst *,
+ const SmallPtrSetImpl<BasicBlock *> &) const;
};
} // end anonymous namespace
@@ -90,10 +93,23 @@ static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
}
}
-// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type.
-static Type *getFrameType(Function *Resume) {
- auto *ArgType = Resume->arg_begin()->getType();
- return cast<PointerType>(ArgType)->getElementType();
+// Given a resume function @f.resume(%f.frame* %frame), returns the size
+// and expected alignment of %f.frame type.
+static std::pair<uint64_t, Align> getFrameLayout(Function *Resume) {
+ // Prefer to pull information from the function attributes.
+ auto Size = Resume->getParamDereferenceableBytes(0);
+ auto Align = Resume->getParamAlign(0);
+
+ // If those aren't given, extract them from the type.
+ if (Size == 0 || !Align) {
+ auto *FrameTy = Resume->arg_begin()->getType()->getPointerElementType();
+
+ const DataLayout &DL = Resume->getParent()->getDataLayout();
+ if (!Size) Size = DL.getTypeAllocSize(FrameTy);
+ if (!Align) Align = DL.getABITypeAlign(FrameTy);
+ }
+
+ return std::make_pair(Size, *Align);
}
// Finds first non alloca instruction in the entry block of a function.
@@ -106,8 +122,9 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
// To elide heap allocations we need to suppress code blocks guarded by
// llvm.coro.alloc and llvm.coro.free instructions.
-void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
- LLVMContext &C = FrameTy->getContext();
+void Lowerer::elideHeapAllocations(Function *F, uint64_t FrameSize,
+ Align FrameAlign, AAResults &AA) {
+ LLVMContext &C = F->getContext();
auto *InsertPt =
getFirstNonAllocaInTheEntryBlock(CoroIds.front()->getFunction());
@@ -128,7 +145,9 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
// here. Possibly we will need to do a mini SROA here and break the coroutine
// frame into individual AllocaInst recreating the original alignment.
const DataLayout &DL = F->getParent()->getDataLayout();
+ auto FrameTy = ArrayType::get(Type::getInt8Ty(C), FrameSize);
auto *Frame = new AllocaInst(FrameTy, DL.getAllocaAddrSpace(), "", InsertPt);
+ Frame->setAlignment(FrameAlign);
auto *FrameVoidPtr =
new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt);
@@ -142,44 +161,92 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
removeTailCallAttribute(Frame, AA);
}
+bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
+ const SmallPtrSetImpl<BasicBlock *> &TIs) const {
+ const auto &It = DestroyAddr.find(CB);
+ assert(It != DestroyAddr.end());
+
+ // Limit the number of blocks we visit.
+ unsigned Limit = 32 * (1 + It->second.size());
+
+ SmallVector<const BasicBlock *, 32> Worklist;
+ Worklist.push_back(CB->getParent());
+
+ SmallPtrSet<const BasicBlock *, 32> Visited;
+ // Consider basicblock of coro.destroy as visited one, so that we
+ // skip the path pass through coro.destroy.
+ for (auto *DA : It->second)
+ Visited.insert(DA->getParent());
+
+ do {
+ const auto *BB = Worklist.pop_back_val();
+ if (!Visited.insert(BB).second)
+ continue;
+ if (TIs.count(BB))
+ return true;
+
+ // Conservatively say that there is potentially a path.
+ if (!--Limit)
+ return true;
+
+ auto TI = BB->getTerminator();
+ // Although the default dest of coro.suspend switches is suspend pointer
+ // which means a escape path to normal terminator, it is reasonable to skip
+ // it since coroutine frame doesn't change outside the coroutine body.
+ if (isa<SwitchInst>(TI) &&
+ CoroSuspendSwitches.count(cast<SwitchInst>(TI))) {
+ Worklist.push_back(cast<SwitchInst>(TI)->getSuccessor(1));
+ Worklist.push_back(cast<SwitchInst>(TI)->getSuccessor(2));
+ } else
+ Worklist.append(succ_begin(BB), succ_end(BB));
+
+ } while (!Worklist.empty());
+
+ // We have exhausted all possible paths and are certain that coro.begin can
+ // not reach to any of terminators.
+ return false;
+}
+
bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
// If no CoroAllocs, we cannot suppress allocation, so elision is not
// possible.
if (CoroAllocs.empty())
return false;
- // Check that for every coro.begin there is a coro.destroy directly
- // referencing the SSA value of that coro.begin along a non-exceptional path.
+ // Check that for every coro.begin there is at least one coro.destroy directly
+ // referencing the SSA value of that coro.begin along each
+ // non-exceptional path.
// If the value escaped, then coro.destroy would have been referencing a
// memory location storing that value and not the virtual register.
+ SmallPtrSet<BasicBlock *, 8> Terminators;
// First gather all of the non-exceptional terminators for the function.
- SmallPtrSet<Instruction *, 8> Terminators;
- for (BasicBlock &B : *F) {
- auto *TI = B.getTerminator();
- if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
- !isa<UnreachableInst>(TI))
- Terminators.insert(TI);
- }
+ // Consider the final coro.suspend as the real terminator when the current
+ // function is a coroutine.
+ for (BasicBlock &B : *F) {
+ auto *TI = B.getTerminator();
+ if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
+ !isa<UnreachableInst>(TI))
+ Terminators.insert(&B);
+ }
// Filter out the coro.destroy that lie along exceptional paths.
- SmallPtrSet<CoroSubFnInst *, 4> DAs;
- for (CoroSubFnInst *DA : DestroyAddr) {
- for (Instruction *TI : Terminators) {
- if (DT.dominates(DA, TI)) {
- DAs.insert(DA);
- break;
+ SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
+ for (auto &It : DestroyAddr) {
+ for (Instruction *DA : It.second) {
+ for (BasicBlock *TI : Terminators) {
+ if (DT.dominates(DA, TI->getTerminator())) {
+ ReferencedCoroBegins.insert(It.first);
+ break;
+ }
}
}
- }
- // Find all the coro.begin referenced by coro.destroy along happy paths.
- SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
- for (CoroSubFnInst *DA : DAs) {
- if (auto *CB = dyn_cast<CoroBeginInst>(DA->getFrame()))
- ReferencedCoroBegins.insert(CB);
- else
- return false;
+ // Whether there is any paths from coro.begin to Terminators which not pass
+ // through any of the coro.destroys.
+ if (!ReferencedCoroBegins.count(It.first) &&
+ !hasEscapePath(It.first, Terminators))
+ ReferencedCoroBegins.insert(It.first);
}
// If size of the set is the same as total number of coro.begin, that means we
@@ -188,6 +255,30 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
return ReferencedCoroBegins.size() == CoroBegins.size();
}
+void Lowerer::collectPostSplitCoroIds(Function *F) {
+ CoroIds.clear();
+ CoroSuspendSwitches.clear();
+ for (auto &I : instructions(F)) {
+ if (auto *CII = dyn_cast<CoroIdInst>(&I))
+ if (CII->getInfo().isPostSplit())
+ // If it is the coroutine itself, don't touch it.
+ if (CII->getCoroutine() != CII->getFunction())
+ CoroIds.push_back(CII);
+
+ // Consider case like:
+ // %0 = call i8 @llvm.coro.suspend(...)
+ // switch i8 %0, label %suspend [i8 0, label %resume
+ // i8 1, label %cleanup]
+ // and collect the SwitchInsts which are used by escape analysis later.
+ if (auto *CSI = dyn_cast<CoroSuspendInst>(&I))
+ if (CSI->hasOneUse() && isa<SwitchInst>(CSI->use_begin()->getUser())) {
+ SwitchInst *SWI = cast<SwitchInst>(CSI->use_begin()->getUser());
+ if (SWI->getNumCases() == 2)
+ CoroSuspendSwitches.insert(SWI);
+ }
+ }
+}
+
bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
DominatorTree &DT) {
CoroBegins.clear();
@@ -218,7 +309,7 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
ResumeAddr.push_back(II);
break;
case CoroSubFnInst::DestroyIndex:
- DestroyAddr.push_back(II);
+ DestroyAddr[CB].push_back(II);
break;
default:
llvm_unreachable("unexpected coro.subfn.addr constant");
@@ -241,11 +332,13 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
Resumers,
ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex);
- replaceWithConstant(DestroyAddrConstant, DestroyAddr);
+ for (auto &It : DestroyAddr)
+ replaceWithConstant(DestroyAddrConstant, It.second);
if (ShouldElide) {
- auto *FrameTy = getFrameType(cast<Function>(ResumeAddrConstant));
- elideHeapAllocations(CoroId->getFunction(), FrameTy, AA);
+ auto FrameSizeAndAlign = getFrameLayout(cast<Function>(ResumeAddrConstant));
+ elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first,
+ FrameSizeAndAlign.second, AA);
coro::replaceCoroFree(CoroId, /*Elide=*/true);
}
@@ -272,9 +365,31 @@ static bool replaceDevirtTrigger(Function &F) {
return true;
}
-//===----------------------------------------------------------------------===//
-// Top Level Driver
-//===----------------------------------------------------------------------===//
+static bool declaresCoroElideIntrinsics(Module &M) {
+ return coro::declaresIntrinsics(M, {"llvm.coro.id"});
+}
+
+PreservedAnalyses CoroElidePass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &M = *F.getParent();
+ if (!declaresCoroElideIntrinsics(M))
+ return PreservedAnalyses::all();
+
+ Lowerer L(M);
+ L.CoroIds.clear();
+ L.collectPostSplitCoroIds(&F);
+ // If we did not find any coro.id, there is nothing to do.
+ if (L.CoroIds.empty())
+ return PreservedAnalyses::all();
+
+ AAResults &AA = AM.getResult<AAManager>(F);
+ DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+
+ bool Changed = false;
+ for (auto *CII : L.CoroIds)
+ Changed |= L.processCoroId(CII, AA, DT);
+
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
namespace {
struct CoroElideLegacy : FunctionPass {
@@ -286,7 +401,7 @@ struct CoroElideLegacy : FunctionPass {
std::unique_ptr<Lowerer> L;
bool doInitialization(Module &M) override {
- if (coro::declaresIntrinsics(M, {"llvm.coro.id"}))
+ if (declaresCoroElideIntrinsics(M))
L = std::make_unique<Lowerer>(M);
return false;
}
@@ -301,15 +416,7 @@ struct CoroElideLegacy : FunctionPass {
Changed = replaceDevirtTrigger(F);
L->CoroIds.clear();
-
- // Collect all PostSplit coro.ids.
- for (auto &I : instructions(F))
- if (auto *CII = dyn_cast<CoroIdInst>(&I))
- if (CII->getInfo().isPostSplit())
- // If it is the coroutine itself, don't touch it.
- if (CII->getCoroutine() != CII->getFunction())
- L->CoroIds.push_back(CII);
-
+ L->collectPostSplitCoroIds(&F);
// If we did not find any coro.id, there is nothing to do.
if (L->CoroIds.empty())
return Changed;
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 2c42cf8a6d25..f55501a05d85 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -18,18 +18,22 @@
#include "CoroInternal.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/PtrUseVisitor.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/OptimizedStructLayout.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <algorithm>
using namespace llvm;
@@ -105,7 +109,6 @@ struct SuspendCrossingInfo {
size_t const DefIndex = Mapping.blockToIndex(DefBB);
size_t const UseIndex = Mapping.blockToIndex(UseBB);
- assert(Block[UseIndex].Consumes[DefIndex] && "use must consume def");
bool const Result = Block[UseIndex].Kills[DefIndex];
LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName()
<< " answer is " << Result << "\n");
@@ -338,52 +341,182 @@ namespace {
// coroutine frame and if the alignment specified on the Alloca instruction
// differs from the natural alignment of the alloca type we will need to insert
// padding.
-struct PaddingCalculator {
+class FrameTypeBuilder {
+ struct Field {
+ uint64_t Size;
+ uint64_t Offset;
+ Spill *ForSpill;
+ Type *Ty;
+ unsigned FieldIndex;
+ Align Alignment;
+ Align TyAlignment;
+ };
+
const DataLayout &DL;
LLVMContext &Context;
- unsigned StructSize = 0;
+ uint64_t StructSize = 0;
+ Align StructAlign;
+ bool IsFinished = false;
- PaddingCalculator(LLVMContext &Context, DataLayout const &DL)
- : DL(DL), Context(Context) {}
+ SmallVector<Field, 8> Fields;
+ DenseMap<Value*, unsigned> FieldIndexByKey;
- // Replicate the logic from IR/DataLayout.cpp to match field offset
- // computation for LLVM structs.
- void addType(Type *Ty) {
- unsigned TyAlign = DL.getABITypeAlignment(Ty);
- if ((StructSize & (TyAlign - 1)) != 0)
- StructSize = alignTo(StructSize, TyAlign);
+public:
+ FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
+ : DL(DL), Context(Context) {}
- StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item.
- }
+ class FieldId {
+ size_t Value;
+ explicit FieldId(size_t Value) : Value(Value) {}
+
+ friend class FrameTypeBuilder;
+ };
+
+ /// Add a field to this structure for the storage of an `alloca`
+ /// instruction.
+ FieldId addFieldForAlloca(AllocaInst *AI, Spill *ForSpill = nullptr,
+ bool IsHeader = false) {
+ Type *Ty = AI->getAllocatedType();
+
+ // Make an array type if this is a static array allocation.
+ if (AI->isArrayAllocation()) {
+ if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
+ Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
+ else
+ report_fatal_error("Coroutines cannot handle non static allocas yet");
+ }
- void addTypes(SmallVectorImpl<Type *> const &Types) {
- for (auto *Ty : Types)
- addType(Ty);
+ return addField(Ty, AI->getAlign(), ForSpill, IsHeader);
}
- unsigned computePadding(Type *Ty, unsigned ForcedAlignment) {
- unsigned TyAlign = DL.getABITypeAlignment(Ty);
- auto Natural = alignTo(StructSize, TyAlign);
- auto Forced = alignTo(StructSize, ForcedAlignment);
+ /// Add a field to this structure.
+ FieldId addField(Type *Ty, MaybeAlign FieldAlignment,
+ Spill *ForSpill = nullptr,
+ bool IsHeader = false) {
+ assert(!IsFinished && "adding fields to a finished builder");
+ assert(Ty && "must provide a type for a field");
- // Return how many bytes of padding we need to insert.
- if (Natural != Forced)
- return std::max(Natural, Forced) - StructSize;
+ // The field size is always the alloc size of the type.
+ uint64_t FieldSize = DL.getTypeAllocSize(Ty);
- // Rely on natural alignment.
- return 0;
+ // The field alignment might not be the type alignment, but we need
+ // to remember the type alignment anyway to build the type.
+ Align TyAlignment = DL.getABITypeAlign(Ty);
+ if (!FieldAlignment) FieldAlignment = TyAlignment;
+
+ // Lay out header fields immediately.
+ uint64_t Offset;
+ if (IsHeader) {
+ Offset = alignTo(StructSize, FieldAlignment);
+ StructSize = Offset + FieldSize;
+
+ // Everything else has a flexible offset.
+ } else {
+ Offset = OptimizedStructLayoutField::FlexibleOffset;
+ }
+
+ Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0,
+ *FieldAlignment, TyAlignment});
+ return FieldId(Fields.size() - 1);
+ }
+
+ /// Finish the layout and set the body on the given type.
+ void finish(StructType *Ty);
+
+ uint64_t getStructSize() const {
+ assert(IsFinished && "not yet finished!");
+ return StructSize;
}
- // If padding required, return the padding field type to insert.
- ArrayType *getPaddingType(Type *Ty, unsigned ForcedAlignment) {
- if (auto Padding = computePadding(Ty, ForcedAlignment))
- return ArrayType::get(Type::getInt8Ty(Context), Padding);
+ Align getStructAlign() const {
+ assert(IsFinished && "not yet finished!");
+ return StructAlign;
+ }
- return nullptr;
+ unsigned getFieldIndex(FieldId Id) const {
+ assert(IsFinished && "not yet finished!");
+ return Fields[Id.Value].FieldIndex;
}
};
} // namespace
+void FrameTypeBuilder::finish(StructType *Ty) {
+ assert(!IsFinished && "already finished!");
+
+ // Prepare the optimal-layout field array.
+ // The Id in the layout field is a pointer to our Field for it.
+ SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
+ LayoutFields.reserve(Fields.size());
+ for (auto &Field : Fields) {
+ LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
+ Field.Offset);
+ }
+
+ // Perform layout.
+ auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
+ StructSize = SizeAndAlign.first;
+ StructAlign = SizeAndAlign.second;
+
+ auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
+ return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
+ };
+
+ // We need to produce a packed struct type if there's a field whose
+ // assigned offset isn't a multiple of its natural type alignment.
+ bool Packed = [&] {
+ for (auto &LayoutField : LayoutFields) {
+ auto &F = getField(LayoutField);
+ if (!isAligned(F.TyAlignment, LayoutField.Offset))
+ return true;
+ }
+ return false;
+ }();
+
+ // Build the struct body.
+ SmallVector<Type*, 16> FieldTypes;
+ FieldTypes.reserve(LayoutFields.size() * 3 / 2);
+ uint64_t LastOffset = 0;
+ for (auto &LayoutField : LayoutFields) {
+ auto &F = getField(LayoutField);
+
+ auto Offset = LayoutField.Offset;
+
+ // Add a padding field if there's a padding gap and we're either
+ // building a packed struct or the padding gap is more than we'd
+ // get from aligning to the field type's natural alignment.
+ assert(Offset >= LastOffset);
+ if (Offset != LastOffset) {
+ if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
+ FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
+ Offset - LastOffset));
+ }
+
+ // Record the layout information into both the Field and the
+ // original Spill, if there is one.
+ F.Offset = Offset;
+ F.FieldIndex = FieldTypes.size();
+ if (F.ForSpill) {
+ F.ForSpill->setFieldIndex(F.FieldIndex);
+ }
+
+ FieldTypes.push_back(F.Ty);
+ LastOffset = Offset + F.Size;
+ }
+
+ Ty->setBody(FieldTypes, Packed);
+
+#ifndef NDEBUG
+ // Check that the IR layout matches the offsets we expect.
+ auto Layout = DL.getStructLayout(Ty);
+ for (auto &F : Fields) {
+ assert(Ty->getElementType(F.FieldIndex) == F.Ty);
+ assert(Layout->getElementOffset(F.FieldIndex) == F.Offset);
+ }
+#endif
+
+ IsFinished = true;
+}
+
// Build a struct that will keep state for an active coroutine.
// struct f.frame {
// ResumeFnTy ResumeFnAddr;
@@ -396,13 +529,17 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
SpillInfo &Spills) {
LLVMContext &C = F.getContext();
const DataLayout &DL = F.getParent()->getDataLayout();
- PaddingCalculator Padder(C, DL);
- SmallString<32> Name(F.getName());
- Name.append(".Frame");
- StructType *FrameTy = StructType::create(C, Name);
- SmallVector<Type *, 8> Types;
+ StructType *FrameTy = [&] {
+ SmallString<32> Name(F.getName());
+ Name.append(".Frame");
+ return StructType::create(C, Name);
+ }();
+
+ FrameTypeBuilder B(C, DL);
AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
+ Optional<FrameTypeBuilder::FieldId> PromiseFieldId;
+ Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId;
if (Shape.ABI == coro::ABI::Switch) {
auto *FramePtrTy = FrameTy->getPointerTo();
@@ -410,74 +547,74 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
/*IsVarArg=*/false);
auto *FnPtrTy = FnTy->getPointerTo();
- // Figure out how wide should be an integer type storing the suspend index.
+ // Add header fields for the resume and destroy functions.
+ // We can rely on these being perfectly packed.
+ B.addField(FnPtrTy, None, nullptr, /*header*/ true);
+ B.addField(FnPtrTy, None, nullptr, /*header*/ true);
+
+ // Add a header field for the promise if there is one.
+ if (PromiseAlloca) {
+ PromiseFieldId =
+ B.addFieldForAlloca(PromiseAlloca, nullptr, /*header*/ true);
+ }
+
+ // Add a field to store the suspend index. This doesn't need to
+ // be in the header.
unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
- Type *PromiseType = PromiseAlloca
- ? PromiseAlloca->getType()->getElementType()
- : Type::getInt1Ty(C);
Type *IndexType = Type::getIntNTy(C, IndexBits);
- Types.push_back(FnPtrTy);
- Types.push_back(FnPtrTy);
- Types.push_back(PromiseType);
- Types.push_back(IndexType);
+
+ SwitchIndexFieldId = B.addField(IndexType, None);
} else {
assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
}
Value *CurrentDef = nullptr;
- Padder.addTypes(Types);
-
// Create an entry for every spilled value.
for (auto &S : Spills) {
+ // We can have multiple entries in Spills for a single value, but
+ // they should form a contiguous run. Ignore all but the first.
if (CurrentDef == S.def())
continue;
CurrentDef = S.def();
- // PromiseAlloca was already added to Types array earlier.
- if (CurrentDef == PromiseAlloca)
- continue;
- uint64_t Count = 1;
- Type *Ty = nullptr;
+ assert(CurrentDef != PromiseAlloca &&
+ "recorded spill use of promise alloca?");
+
if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) {
- Ty = AI->getAllocatedType();
- if (unsigned AllocaAlignment = AI->getAlignment()) {
- // If alignment is specified in alloca, see if we need to insert extra
- // padding.
- if (auto PaddingTy = Padder.getPaddingType(Ty, AllocaAlignment)) {
- Types.push_back(PaddingTy);
- Padder.addType(PaddingTy);
- }
- }
- if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
- Count = CI->getValue().getZExtValue();
- else
- report_fatal_error("Coroutines cannot handle non static allocas yet");
+ B.addFieldForAlloca(AI, &S);
} else {
- Ty = CurrentDef->getType();
+ Type *Ty = CurrentDef->getType();
+ B.addField(Ty, None, &S);
}
- S.setFieldIndex(Types.size());
- if (Count == 1)
- Types.push_back(Ty);
- else
- Types.push_back(ArrayType::get(Ty, Count));
- Padder.addType(Ty);
}
- FrameTy->setBody(Types);
+
+ B.finish(FrameTy);
+ Shape.FrameAlign = B.getStructAlign();
+ Shape.FrameSize = B.getStructSize();
switch (Shape.ABI) {
+ // In the switch ABI, remember the field indices for the promise and
+ // switch-index fields.
case coro::ABI::Switch:
+ Shape.SwitchLowering.IndexField =
+ B.getFieldIndex(*SwitchIndexFieldId);
+ Shape.SwitchLowering.PromiseField =
+ (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0);
+
+ // Also round the frame size up to a multiple of its alignment, as is
+ // generally expected in C/C++.
+ Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
break;
- // Remember whether the frame is inline in the storage.
+ // In the retcon ABI, remember whether the frame is inline in the storage.
case coro::ABI::Retcon:
case coro::ABI::RetconOnce: {
- auto &Layout = F.getParent()->getDataLayout();
auto Id = Shape.getRetconCoroId();
Shape.RetconLowering.IsFrameInlineInStorage
- = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() &&
- Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment());
+ = (B.getStructSize() <= Id->getStorageSize() &&
+ B.getStructAlign() <= Id->getStorageAlignment());
break;
}
}
@@ -606,10 +743,12 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
// we remember allocas and their indices to be handled once we processed
// all the spills.
SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
- // Promise alloca (if present) has a fixed field number.
+
+ // Promise alloca (if present) doesn't show in the spills and has a
+ // special field number.
if (auto *PromiseAlloca = Shape.getPromiseAlloca()) {
assert(Shape.ABI == coro::ABI::Switch);
- Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise);
+ Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField());
}
// Create a GEP with the given index into the coroutine frame for the original
@@ -636,12 +775,12 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
};
// Create a load instruction to reload the spilled value from the coroutine
- // frame.
- auto CreateReload = [&](Instruction *InsertBefore) {
+ // frame. Populates the Value pointer reference provided with the frame GEP.
+ auto CreateReload = [&](Instruction *InsertBefore, Value *&G) {
assert(Index != InvalidFieldIndex && "accessing unassigned field number");
Builder.SetInsertPoint(InsertBefore);
- auto *G = GetFramePointer(Index, CurrentValue);
+ G = GetFramePointer(Index, CurrentValue);
G->setName(CurrentValue->getName() + Twine(".reload.addr"));
return isa<AllocaInst>(CurrentValue)
@@ -650,6 +789,7 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
CurrentValue->getName() + Twine(".reload"));
};
+ Value *GEP = nullptr, *CurrentGEP = nullptr;
for (auto const &E : Spills) {
// If we have not seen the value, generate a spill.
if (CurrentValue != E.def()) {
@@ -722,7 +862,7 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
// If we have not seen the use block, generate a reload in it.
if (CurrentBlock != E.userBlock()) {
CurrentBlock = E.userBlock();
- CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt());
+ CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt(), GEP);
}
// If we have a single edge PHINode, remove it and replace it with a reload
@@ -736,6 +876,19 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
continue;
}
+ // If we have not seen this GEP instruction, migrate any dbg.declare from
+ // the alloca to it.
+ if (CurrentGEP != GEP) {
+ CurrentGEP = GEP;
+ TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(CurrentValue);
+ if (!DIs.empty())
+ DIBuilder(*CurrentBlock->getParent()->getParent(),
+ /*AllowUnresolved*/ false)
+ .insertDeclare(CurrentGEP, DIs.front()->getVariable(),
+ DIs.front()->getExpression(),
+ DIs.front()->getDebugLoc(), DIs.front());
+ }
+
// Replace all uses of CurrentValue in the current instruction with reload.
E.user()->replaceUsesOfWith(CurrentValue, CurrentReload);
}
@@ -746,14 +899,38 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
Shape.AllocaSpillBlock = SpillBlock;
- // If we found any allocas, replace all of their remaining uses with Geps.
- // Note: we cannot do it indiscriminately as some of the uses may not be
- // dominated by CoroBegin.
+
+ // retcon and retcon.once lowering assumes all uses have been sunk.
+ if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) {
+ // If we found any allocas, replace all of their remaining uses with Geps.
+ Builder.SetInsertPoint(&SpillBlock->front());
+ for (auto &P : Allocas) {
+ auto *G = GetFramePointer(P.second, P.first);
+
+ // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
+ // here, as we are changing location of the instruction.
+ G->takeName(P.first);
+ P.first->replaceAllUsesWith(G);
+ P.first->eraseFromParent();
+ }
+ return FramePtr;
+ }
+
+ // If we found any alloca, replace all of their remaining uses with GEP
+ // instructions. Because new dbg.declare have been created for these alloca,
+ // we also delete the original dbg.declare and replace other uses with undef.
+ // Note: We cannot replace the alloca with GEP instructions indiscriminately,
+ // as some of the uses may not be dominated by CoroBegin.
bool MightNeedToCopy = false;
Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
SmallVector<Instruction *, 4> UsersToUpdate;
for (auto &P : Allocas) {
AllocaInst *const A = P.first;
+
+ for (auto *DI : FindDbgDeclareUses(A))
+ DI->eraseFromParent();
+ replaceDbgUsesWithUndef(A);
+
UsersToUpdate.clear();
for (User *U : A->users()) {
auto *I = cast<Instruction>(U);
@@ -784,7 +961,7 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
"Coroutines cannot handle copying of array allocas yet");
auto *G = GetFramePointer(P.second, A);
- auto *Value = Builder.CreateLoad(A);
+ auto *Value = Builder.CreateLoad(A->getAllocatedType(), A);
Builder.CreateStore(Value, G);
}
}
@@ -1106,7 +1283,7 @@ static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
// Allocate memory.
auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
- Alloca->setAlignment(MaybeAlign(AI->getAlignment()));
+ Alloca->setAlignment(Align(AI->getAlignment()));
for (auto U : AI->users()) {
// Replace gets with the allocation.
@@ -1166,7 +1343,7 @@ static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
auto FnTy = FunctionType::get(ValueTy, {}, false);
auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
- auto Call = Builder.CreateCall(Fn, {});
+ auto Call = Builder.CreateCall(FnTy, Fn, {});
Shape.SwiftErrorOps.push_back(Call);
return Call;
@@ -1182,7 +1359,7 @@ static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
{V->getType()}, false);
auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
- auto Call = Builder.CreateCall(Fn, { V });
+ auto Call = Builder.CreateCall(FnTy, Fn, { V });
Shape.SwiftErrorOps.push_back(Call);
return Call;
@@ -1322,11 +1499,125 @@ static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
}
}
-void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
- // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite
- // access to local variables.
- LowerDbgDeclare(F);
+/// retcon and retcon.once conventions assume that all spill uses can be sunk
+/// after the coro.begin intrinsic.
+static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills,
+ CoroBeginInst *CoroBegin) {
+ DominatorTree Dom(F);
+
+ SmallSetVector<Instruction *, 32> ToMove;
+ SmallVector<Instruction *, 32> Worklist;
+
+ // Collect all users that precede coro.begin.
+ for (auto const &Entry : Spills) {
+ auto *SpillDef = Entry.def();
+ for (User *U : SpillDef->users()) {
+ auto Inst = cast<Instruction>(U);
+ if (Inst->getParent() != CoroBegin->getParent() ||
+ Dom.dominates(CoroBegin, Inst))
+ continue;
+ if (ToMove.insert(Inst))
+ Worklist.push_back(Inst);
+ }
+ }
+ // Recursively collect users before coro.begin.
+ while (!Worklist.empty()) {
+ auto *Def = Worklist.back();
+ Worklist.pop_back();
+ for (User *U : Def->users()) {
+ auto Inst = cast<Instruction>(U);
+ if (Dom.dominates(CoroBegin, Inst))
+ continue;
+ if (ToMove.insert(Inst))
+ Worklist.push_back(Inst);
+ }
+ }
+
+ // Sort by dominance.
+ SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
+ std::sort(InsertionList.begin(), InsertionList.end(),
+ [&Dom](Instruction *A, Instruction *B) -> bool {
+ // If a dominates b it should preceed (<) b.
+ return Dom.dominates(A, B);
+ });
+ Instruction *InsertPt = CoroBegin->getNextNode();
+ for (Instruction *Inst : InsertionList)
+ Inst->moveBefore(InsertPt);
+
+ return;
+}
+
+/// For each local variable that all of its user are only used inside one of
+/// suspended region, we sink their lifetime.start markers to the place where
+/// after the suspend block. Doing so minimizes the lifetime of each variable,
+/// hence minimizing the amount of data we end up putting on the frame.
+static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
+ SuspendCrossingInfo &Checker) {
+ DominatorTree DT(F);
+
+ // Collect all possible basic blocks which may dominate all uses of allocas.
+ SmallPtrSet<BasicBlock *, 4> DomSet;
+ DomSet.insert(&F.getEntryBlock());
+ for (auto *CSI : Shape.CoroSuspends) {
+ BasicBlock *SuspendBlock = CSI->getParent();
+ assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
+ "should have split coro.suspend into its own block");
+ DomSet.insert(SuspendBlock->getSingleSuccessor());
+ }
+
+ for (Instruction &I : instructions(F)) {
+ if (!isa<AllocaInst>(&I))
+ continue;
+
+ for (BasicBlock *DomBB : DomSet) {
+ bool Valid = true;
+ SmallVector<Instruction *, 1> BCInsts;
+
+ auto isUsedByLifetimeStart = [&](Instruction *I) {
+ if (isa<BitCastInst>(I) && I->hasOneUse())
+ if (auto *IT = dyn_cast<IntrinsicInst>(I->user_back()))
+ return IT->getIntrinsicID() == Intrinsic::lifetime_start;
+ return false;
+ };
+
+ for (User *U : I.users()) {
+ Instruction *UI = cast<Instruction>(U);
+ // For all users except lifetime.start markers, if they are all
+ // dominated by one of the basic blocks and do not cross
+ // suspend points as well, then there is no need to spill the
+ // instruction.
+ if (!DT.dominates(DomBB, UI->getParent()) ||
+ Checker.isDefinitionAcrossSuspend(DomBB, U)) {
+ // Skip bitcast used by lifetime.start markers.
+ if (isUsedByLifetimeStart(UI)) {
+ BCInsts.push_back(UI);
+ continue;
+ }
+ Valid = false;
+ break;
+ }
+ }
+ // Sink lifetime.start markers to dominate block when they are
+ // only used outside the region.
+ if (Valid && BCInsts.size() != 0) {
+ auto *NewBitcast = BCInsts[0]->clone();
+ auto *NewLifetime = cast<Instruction>(BCInsts[0]->user_back())->clone();
+ NewLifetime->replaceUsesOfWith(BCInsts[0], NewBitcast);
+ NewBitcast->insertBefore(DomBB->getTerminator());
+ NewLifetime->insertBefore(DomBB->getTerminator());
+
+ // All the outsided lifetime.start markers are no longer necessary.
+ for (Instruction *S : BCInsts) {
+ S->user_back()->eraseFromParent();
+ }
+ break;
+ }
+ }
+ }
+}
+
+void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
eliminateSwiftError(F, Shape);
if (Shape.ABI == coro::ABI::Switch &&
@@ -1376,6 +1667,25 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
Spills.clear();
}
+ sinkLifetimeStartMarkers(F, Shape, Checker);
+ // Collect lifetime.start info for each alloca.
+ using LifetimeStart = SmallPtrSet<Instruction *, 2>;
+ llvm::DenseMap<Instruction *, std::unique_ptr<LifetimeStart>> LifetimeMap;
+ for (Instruction &I : instructions(F)) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)
+ continue;
+
+ if (auto *OpInst = dyn_cast<BitCastInst>(I.getOperand(1)))
+ if (auto *AI = dyn_cast<AllocaInst>(OpInst->getOperand(0))) {
+
+ if (LifetimeMap.find(AI) == LifetimeMap.end())
+ LifetimeMap[AI] = std::make_unique<LifetimeStart>();
+
+ LifetimeMap[AI]->insert(OpInst);
+ }
+ }
+
// Collect the spills for arguments and other not-materializable values.
for (Argument &A : F.args())
for (User *U : A.users())
@@ -1421,16 +1731,31 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
continue;
}
- for (User *U : I.users())
- if (Checker.isDefinitionAcrossSuspend(I, U)) {
+ auto Iter = LifetimeMap.find(&I);
+ for (User *U : I.users()) {
+ bool NeedSpill = false;
+
+ // Check against lifetime.start if the instruction has the info.
+ if (Iter != LifetimeMap.end())
+ for (auto *S : *Iter->second) {
+ if ((NeedSpill = Checker.isDefinitionAcrossSuspend(*S, U)))
+ break;
+ }
+ else
+ NeedSpill = Checker.isDefinitionAcrossSuspend(I, U);
+
+ if (NeedSpill) {
// We cannot spill a token.
if (I.getType()->isTokenTy())
report_fatal_error(
"token definition is separated from the use by a suspend point");
Spills.emplace_back(&I, U);
}
+ }
}
LLVM_DEBUG(dump("Spills", Spills));
+ if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce)
+ sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin);
Shape.FrameTy = buildFrameType(F, Shape, Spills);
Shape.FramePtr = insertSpills(Spills, Shape);
lowerLocalAllocas(LocalAllocas, DeadInstructions);
diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h
index de2d2920cb15..320137526db8 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -211,8 +211,8 @@ public:
return cast<ConstantInt>(getArgOperand(SizeArg))->getZExtValue();
}
- uint64_t getStorageAlignment() const {
- return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+ Align getStorageAlignment() const {
+ return cast<ConstantInt>(getArgOperand(AlignArg))->getAlignValue();
}
Value *getStorage() const {
@@ -338,11 +338,16 @@ class LLVM_LIBRARY_VISIBILITY CoroPromiseInst : public IntrinsicInst {
enum { FrameArg, AlignArg, FromArg };
public:
+ /// Are we translating from the frame to the promise (false) or from
+ /// the promise to the frame (true)?
bool isFromPromise() const {
return cast<Constant>(getArgOperand(FromArg))->isOneValue();
}
- unsigned getAlignment() const {
- return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+
+ /// The required alignment of the promise. This must match the
+ /// alignment of the promise alloca in the coroutine.
+ Align getAlignment() const {
+ return cast<ConstantInt>(getArgOperand(AlignArg))->getAlignValue();
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
@@ -463,8 +468,8 @@ public:
Value *getSize() const {
return getArgOperand(SizeArg);
}
- unsigned getAlignment() const {
- return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+ Align getAlignment() const {
+ return cast<ConstantInt>(getArgOperand(AlignArg))->getAlignValue();
}
// Methods to support type inquiry through isa, cast, and dyn_cast:
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 7eb35400c0d5..bd76e93c9124 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -96,17 +96,22 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
struct SwitchFieldIndex {
enum {
Resume,
- Destroy,
- Promise,
- Index,
- /// The index of the first spill field.
- FirstSpill
+ Destroy
+
+ // The promise field is always at a fixed offset from the start of
+ // frame given its type, but the index isn't a constant for all
+ // possible frames.
+
+ // The switch-index field isn't at a fixed offset or index, either;
+ // we just work it in where it fits best.
};
};
coro::ABI ABI;
StructType *FrameTy;
+ Align FrameAlign;
+ uint64_t FrameSize;
Instruction *FramePtr;
BasicBlock *AllocaSpillBlock;
@@ -114,6 +119,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
SwitchInst *ResumeSwitch;
AllocaInst *PromiseAlloca;
BasicBlock *ResumeEntryBlock;
+ unsigned IndexField;
+ unsigned PromiseField;
bool HasFinalSuspend;
};
@@ -141,10 +148,15 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
return cast<AnyCoroIdRetconInst>(CoroBegin->getId());
}
+ unsigned getSwitchIndexField() const {
+ assert(ABI == coro::ABI::Switch);
+ assert(FrameTy && "frame type not assigned");
+ return SwitchLowering.IndexField;
+ }
IntegerType *getIndexType() const {
assert(ABI == coro::ABI::Switch);
assert(FrameTy && "frame type not assigned");
- return cast<IntegerType>(FrameTy->getElementType(SwitchFieldIndex::Index));
+ return cast<IntegerType>(FrameTy->getElementType(getSwitchIndexField()));
}
ConstantInt *getIndex(uint64_t Value) const {
return ConstantInt::get(getIndexType(), Value);
@@ -203,23 +215,17 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
llvm_unreachable("Unknown coro::ABI enum");
}
- unsigned getFirstSpillFieldIndex() const {
- switch (ABI) {
- case coro::ABI::Switch:
- return SwitchFieldIndex::FirstSpill;
-
- case coro::ABI::Retcon:
- case coro::ABI::RetconOnce:
- return 0;
- }
- llvm_unreachable("Unknown coro::ABI enum");
- }
-
AllocaInst *getPromiseAlloca() const {
if (ABI == coro::ABI::Switch)
return SwitchLowering.PromiseAlloca;
return nullptr;
}
+ unsigned getPromiseField() const {
+ assert(ABI == coro::ABI::Switch);
+ assert(FrameTy && "frame type not assigned");
+ assert(SwitchLowering.PromiseAlloca && "no promise alloca");
+ return SwitchLowering.PromiseField;
+ }
/// Allocate memory according to the rules of the active lowering.
///
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 66cb3e74e53e..9c4392e7999b 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -18,6 +18,7 @@
// coroutine.
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Coroutines/CoroSplit.h"
#include "CoroInstr.h"
#include "CoroInternal.h"
#include "llvm/ADT/DenseMap.h"
@@ -31,7 +32,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -59,6 +59,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -74,7 +75,7 @@ using namespace llvm;
namespace {
-/// A little helper class for building
+/// A little helper class for building
class CoroCloner {
public:
enum class Kind {
@@ -283,7 +284,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
auto *FramePtr = Shape.FramePtr;
auto *FrameTy = Shape.FrameTy;
auto *GepIndex = Builder.CreateStructGEP(
- FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
+ FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
auto *Switch =
Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
@@ -309,7 +310,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
Builder.CreateStore(NullPtr, GepIndex);
} else {
auto *GepIndex = Builder.CreateStructGEP(
- FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
+ FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
Builder.CreateStore(IndexVal, GepIndex);
}
Save->replaceAllUsesWith(ConstantTokenNone::get(C));
@@ -562,11 +563,12 @@ void CoroCloner::replaceEntryBlock() {
// In the original function, the AllocaSpillBlock is a block immediately
// following the allocation of the frame object which defines GEPs for
// all the allocas that have been moved into the frame, and it ends by
- // branching to the original beginning of the coroutine. Make this
+ // branching to the original beginning of the coroutine. Make this
// the entry block of the cloned function.
auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
+ auto *OldEntry = &NewF->getEntryBlock();
Entry->setName("entry" + Suffix);
- Entry->moveBefore(&NewF->getEntryBlock());
+ Entry->moveBefore(OldEntry);
Entry->getTerminator()->eraseFromParent();
// Clear all predecessors of the new entry block. There should be
@@ -579,8 +581,14 @@ void CoroCloner::replaceEntryBlock() {
Builder.CreateUnreachable();
BranchToEntry->eraseFromParent();
- // TODO: move any allocas into Entry that weren't moved into the frame.
- // (Currently we move all allocas into the frame.)
+ // Move any allocas into Entry that weren't moved into the frame.
+ for (auto IT = OldEntry->begin(), End = OldEntry->end(); IT != End;) {
+ Instruction &I = *IT++;
+ if (!isa<AllocaInst>(&I) || I.use_empty())
+ continue;
+
+ I.moveBefore(*Entry, Entry->getFirstInsertionPt());
+ }
// Branch from the entry to the appropriate place.
Builder.SetInsertPoint(Entry);
@@ -630,12 +638,23 @@ Value *CoroCloner::deriveNewFramePointer() {
// Otherwise, load the real frame from the opaque storage.
auto FramePtrPtr =
Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo());
- return Builder.CreateLoad(FramePtrPtr);
+ return Builder.CreateLoad(FramePtrTy, FramePtrPtr);
}
}
llvm_unreachable("bad ABI");
}
+static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
+ unsigned ParamIndex,
+ uint64_t Size, Align Alignment) {
+ AttrBuilder ParamAttrs;
+ ParamAttrs.addAttribute(Attribute::NonNull);
+ ParamAttrs.addAttribute(Attribute::NoAlias);
+ ParamAttrs.addAlignmentAttr(Alignment);
+ ParamAttrs.addDereferenceableAttr(Size);
+ Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
+}
+
/// Clone the body of the original function into a resume function of
/// some sort.
void CoroCloner::create() {
@@ -684,6 +703,9 @@ void CoroCloner::create() {
// original function. This should include optimization settings and so on.
NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex,
OrigAttrs.getFnAttributes());
+
+ addFramePointerAttrs(NewAttrs, Context, 0,
+ Shape.FrameSize, Shape.FrameAlign);
break;
case coro::ABI::Retcon:
@@ -691,13 +713,13 @@ void CoroCloner::create() {
// If we have a continuation prototype, just use its attributes,
// full-stop.
NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
+
+ addFramePointerAttrs(NewAttrs, Context, 0,
+ Shape.getRetconCoroId()->getStorageSize(),
+ Shape.getRetconCoroId()->getStorageAlignment());
break;
}
- // Make the frame parameter nonnull and noalias.
- NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull);
- NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias);
-
switch (Shape.ABI) {
// In these ABIs, the cloned functions always return 'void', and the
// existing return sites are meaningless. Note that for unique
@@ -872,7 +894,8 @@ static void postSplitCleanup(Function &F) {
// For now, we do a mandatory verification step because we don't
// entirely trust this pass. Note that we don't want to add a verifier
// pass to FPM below because it will also verify all the global data.
- verifyFunction(F);
+ if (verifyFunction(F, &errs()))
+ report_fatal_error("Broken function");
legacy::FunctionPassManager FPM(F.getParent());
@@ -911,17 +934,14 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
BasicBlock *UnconditionalSucc = nullptr;
Instruction *I = InitialInst;
- while (I->isTerminator()) {
+ while (I->isTerminator() ||
+ (isa<CmpInst>(I) && I->getNextNode()->isTerminator())) {
if (isa<ReturnInst>(I)) {
if (I != InitialInst) {
// If InitialInst is an unconditional branch,
// remove PHI values that come from basic block of InitialInst
if (UnconditionalSucc)
- for (PHINode &PN : UnconditionalSucc->phis()) {
- int idx = PN.getBasicBlockIndex(InitialInst->getParent());
- if (idx != -1)
- PN.removeIncomingValue(idx);
- }
+ UnconditionalSucc->removePredecessor(InitialInst->getParent(), true);
ReplaceInstWithInst(InitialInst, I->clone());
}
return true;
@@ -935,6 +955,29 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
I = BB->getFirstNonPHIOrDbgOrLifetime();
continue;
}
+ } else if (auto *CondCmp = dyn_cast<CmpInst>(I)) {
+ auto *BR = dyn_cast<BranchInst>(I->getNextNode());
+ if (BR && BR->isConditional() && CondCmp == BR->getCondition()) {
+ // If the case number of suspended switch instruction is reduced to
+ // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
+ // And the comparsion looks like : %cond = icmp eq i8 %V, constant.
+ ConstantInt *CondConst = dyn_cast<ConstantInt>(CondCmp->getOperand(1));
+ if (CondConst && CondCmp->getPredicate() == CmpInst::ICMP_EQ) {
+ Value *V = CondCmp->getOperand(0);
+ auto it = ResolvedValues.find(V);
+ if (it != ResolvedValues.end())
+ V = it->second;
+
+ if (ConstantInt *Cond0 = dyn_cast<ConstantInt>(V)) {
+ BasicBlock *BB = Cond0->equalsInt(CondConst->getZExtValue())
+ ? BR->getSuccessor(0)
+ : BR->getSuccessor(1);
+ scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
+ I = BB->getFirstNonPHIOrDbgOrLifetime();
+ continue;
+ }
+ }
+ }
} else if (auto *SI = dyn_cast<SwitchInst>(I)) {
Value *V = SI->getCondition();
auto it = ResolvedValues.find(V);
@@ -952,6 +995,37 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
return false;
}
+// Check whether CI obeys the rules of musttail attribute.
+static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
+ if (CI.isInlineAsm())
+ return false;
+
+ // Match prototypes and calling conventions of resume function.
+ FunctionType *CalleeTy = CI.getFunctionType();
+ if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1))
+ return false;
+
+ Type *CalleeParmTy = CalleeTy->getParamType(0);
+ if (!CalleeParmTy->isPointerTy() ||
+ (CalleeParmTy->getPointerAddressSpace() != 0))
+ return false;
+
+ if (CI.getCallingConv() != F.getCallingConv())
+ return false;
+
+ // CI should not has any ABI-impacting function attributes.
+ static const Attribute::AttrKind ABIAttrs[] = {
+ Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
+ Attribute::Preallocated, Attribute::InReg, Attribute::Returned,
+ Attribute::SwiftSelf, Attribute::SwiftError};
+ AttributeList Attrs = CI.getAttributes();
+ for (auto AK : ABIAttrs)
+ if (Attrs.hasParamAttribute(0, AK))
+ return false;
+
+ return true;
+}
+
// Add musttail to any resume instructions that is immediately followed by a
// suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
// for symmetrical coroutine control transfer (C++ Coroutines TS extension).
@@ -964,11 +1038,8 @@ static void addMustTailToCoroResumes(Function &F) {
SmallVector<CallInst *, 4> Resumes;
for (auto &I : instructions(F))
if (auto *Call = dyn_cast<CallInst>(&I))
- if (auto *CalledValue = Call->getCalledValue())
- // CoroEarly pass replaced coro resumes with indirect calls to an
- // address return by CoroSubFnInst intrinsic. See if it is one of those.
- if (isa<CoroSubFnInst>(CalledValue->stripPointerCasts()))
- Resumes.push_back(Call);
+ if (shouldBeMustTail(*Call, F))
+ Resumes.push_back(Call);
// Set musttail on those that are followed by a ret instruction.
for (CallInst *Call : Resumes)
@@ -993,8 +1064,8 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) {
coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
if (AllocInst) {
IRBuilder<> Builder(AllocInst);
- // FIXME: Need to handle overaligned members.
auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
+ Frame->setAlignment(Shape.FrameAlign);
auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
AllocInst->replaceAllUsesWith(Builder.getFalse());
AllocInst->eraseFromParent();
@@ -1023,7 +1094,7 @@ static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
if (isa<IntrinsicInst>(I))
continue;
- if (CallSite(I))
+ if (isa<CallBase>(I))
return true;
}
return false;
@@ -1093,13 +1164,11 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
Prev = Pred->getTerminator();
}
- CallSite CS{Prev};
- if (!CS)
+ CallBase *CB = dyn_cast<CallBase>(Prev);
+ if (!CB)
return false;
- auto *CallInstr = CS.getInstruction();
-
- auto *Callee = CS.getCalledValue()->stripPointerCasts();
+ auto *Callee = CB->getCalledOperand()->stripPointerCasts();
// See if the callsite is for resumption or destruction of the coroutine.
auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
@@ -1114,7 +1183,7 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
// calls in between Save and CallInstr. They can potenitally resume the
// coroutine rendering this optimization unsafe.
auto *Save = Suspend->getCoroSave();
- if (hasCallsBetween(Save, CallInstr))
+ if (hasCallsBetween(Save, CB))
return false;
// Replace llvm.coro.suspend with the value that results in resumption over
@@ -1124,13 +1193,13 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
Save->eraseFromParent();
// No longer need a call to coro.resume or coro.destroy.
- if (auto *Invoke = dyn_cast<InvokeInst>(CallInstr)) {
+ if (auto *Invoke = dyn_cast<InvokeInst>(CB)) {
BranchInst::Create(Invoke->getNormalDest(), Invoke);
}
- // Grab the CalledValue from CS before erasing the CallInstr.
- auto *CalledValue = CS.getCalledValue();
- CallInstr->eraseFromParent();
+ // Grab the CalledValue from CB before erasing the CallInstr.
+ auto *CalledValue = CB->getCalledOperand();
+ CB->eraseFromParent();
// If no more users remove it. Usually it is a bitcast of SubFn.
if (CalledValue != SubFn && CalledValue->user_empty())
@@ -1155,7 +1224,10 @@ static void simplifySuspendPoints(coro::Shape &Shape) {
if (N == 0)
return;
while (true) {
- if (simplifySuspendPoint(cast<CoroSuspendInst>(S[I]), Shape.CoroBegin)) {
+ auto SI = cast<CoroSuspendInst>(S[I]);
+ // Leave final.suspend to handleFinalSuspend since it is undefined behavior
+ // to resume a coroutine suspended at the final suspend point.
+ if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) {
if (--N == I)
break;
std::swap(S[I], S[N]);
@@ -1225,6 +1297,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
// Allocate. We don't need to update the call graph node because we're
// going to recompute it from scratch after splitting.
+ // FIXME: pass the required alignment
RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
RawFramePtr =
Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
@@ -1342,19 +1415,8 @@ namespace {
};
}
-static void splitCoroutine(Function &F, coro::Shape &Shape,
- SmallVectorImpl<Function *> &Clones) {
- switch (Shape.ABI) {
- case coro::ABI::Switch:
- return splitSwitchCoroutine(F, Shape, Clones);
- case coro::ABI::Retcon:
- case coro::ABI::RetconOnce:
- return splitRetconCoroutine(F, Shape, Clones);
- }
- llvm_unreachable("bad ABI kind");
-}
-
-static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
+static coro::Shape splitCoroutine(Function &F,
+ SmallVectorImpl<Function *> &Clones) {
PrettyStackTraceFunction prettyStackTrace(F);
// The suspend-crossing algorithm in buildCoroutineFrame get tripped
@@ -1363,26 +1425,42 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
coro::Shape Shape(F);
if (!Shape.CoroBegin)
- return;
+ return Shape;
simplifySuspendPoints(Shape);
buildCoroutineFrame(F, Shape);
replaceFrameSize(Shape);
- SmallVector<Function*, 4> Clones;
-
// If there are no suspend points, no split required, just remove
// the allocation and deallocation blocks, they are not needed.
if (Shape.CoroSuspends.empty()) {
handleNoSuspendCoroutine(Shape);
} else {
- splitCoroutine(F, Shape, Clones);
+ switch (Shape.ABI) {
+ case coro::ABI::Switch:
+ splitSwitchCoroutine(F, Shape, Clones);
+ break;
+ case coro::ABI::Retcon:
+ case coro::ABI::RetconOnce:
+ splitRetconCoroutine(F, Shape, Clones);
+ break;
+ }
}
// Replace all the swifterror operations in the original function.
// This invalidates SwiftErrorOps in the Shape.
replaceSwiftErrorOps(F, Shape, nullptr);
+ return Shape;
+}
+
+static void
+updateCallGraphAfterCoroutineSplit(Function &F, const coro::Shape &Shape,
+ const SmallVectorImpl<Function *> &Clones,
+ CallGraph &CG, CallGraphSCC &SCC) {
+ if (!Shape.CoroBegin)
+ return;
+
removeCoroEnds(Shape, &CG);
postSplitCleanup(F);
@@ -1390,6 +1468,44 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
coro::updateCallGraph(F, Clones, CG, SCC);
}
+static void updateCallGraphAfterCoroutineSplit(
+ LazyCallGraph::Node &N, const coro::Shape &Shape,
+ const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
+ LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
+ FunctionAnalysisManager &FAM) {
+ if (!Shape.CoroBegin)
+ return;
+
+ for (llvm::CoroEndInst *End : Shape.CoroEnds) {
+ auto &Context = End->getContext();
+ End->replaceAllUsesWith(ConstantInt::getFalse(Context));
+ End->eraseFromParent();
+ }
+
+ postSplitCleanup(N.getFunction());
+
+ // To insert the newly created coroutine funclets 'f.resume', 'f.destroy', and
+ // 'f.cleanup' into the same SCC as the coroutine 'f' they were outlined from,
+ // we make use of the CallGraphUpdater class, which can modify the internal
+ // state of the LazyCallGraph.
+ for (Function *Clone : Clones)
+ CG.addNewFunctionIntoRefSCC(*Clone, C.getOuterRefSCC());
+
+ // We've inserted instructions into coroutine 'f' that reference the three new
+ // coroutine funclets. We must now update the call graph so that reference
+ // edges between 'f' and its funclets are added to it. LazyCallGraph only
+ // allows CGSCC passes to insert "trivial" reference edges. We've ensured
+ // above, by inserting the funclets into the same SCC as the corutine, that
+ // the edges are trivial.
+ //
+ // N.B.: If we didn't update the call graph here, a CGSCCToFunctionPassAdaptor
+ // later in this CGSCC pass pipeline may be run, triggering a call graph
+ // update of its own. Function passes run by the adaptor are not permitted to
+ // add new edges of any kind to the graph, and the new edges inserted by this
+ // pass would be misattributed to that unrelated function pass.
+ updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
+}
+
// When we see the coroutine the first time, we insert an indirect call to a
// devirt trigger function and mark the coroutine that it is now ready for
// split.
@@ -1521,12 +1637,89 @@ static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
return Changed;
}
-//===----------------------------------------------------------------------===//
-// Top Level Driver
-//===----------------------------------------------------------------------===//
+static bool declaresCoroSplitIntrinsics(const Module &M) {
+ return coro::declaresIntrinsics(
+ M, {"llvm.coro.begin", "llvm.coro.prepare.retcon"});
+}
+
+PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG, CGSCCUpdateResult &UR) {
+ // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a
+ // non-zero number of nodes, so we assume that here and grab the first
+ // node's function's module.
+ Module &M = *C.begin()->getFunction().getParent();
+ auto &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+
+ if (!declaresCoroSplitIntrinsics(M))
+ return PreservedAnalyses::all();
+
+ // Check for uses of llvm.coro.prepare.retcon.
+ const auto *PrepareFn = M.getFunction("llvm.coro.prepare.retcon");
+ if (PrepareFn && PrepareFn->use_empty())
+ PrepareFn = nullptr;
+
+ // Find coroutines for processing.
+ SmallVector<LazyCallGraph::Node *, 4> Coroutines;
+ for (LazyCallGraph::Node &N : C)
+ if (N.getFunction().hasFnAttribute(CORO_PRESPLIT_ATTR))
+ Coroutines.push_back(&N);
+
+ if (Coroutines.empty() && !PrepareFn)
+ return PreservedAnalyses::all();
+
+ if (Coroutines.empty())
+ llvm_unreachable("new pass manager cannot yet handle "
+ "'llvm.coro.prepare.retcon'");
+
+ // Split all the coroutines.
+ for (LazyCallGraph::Node *N : Coroutines) {
+ Function &F = N->getFunction();
+ Attribute Attr = F.getFnAttribute(CORO_PRESPLIT_ATTR);
+ StringRef Value = Attr.getValueAsString();
+ LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
+ << "' state: " << Value << "\n");
+ if (Value == UNPREPARED_FOR_SPLIT) {
+ // Enqueue a second iteration of the CGSCC pipeline.
+ // N.B.:
+ // The CoroSplitLegacy pass "triggers" a restart of the CGSCC pass
+ // pipeline by inserting an indirect function call that the
+ // CoroElideLegacy pass then replaces with a direct function call. The
+ // legacy CGSCC pipeline's implicit behavior was as if wrapped in the new
+ // pass manager abstraction DevirtSCCRepeatedPass.
+ //
+ // This pass does not need to "trigger" another run of the pipeline.
+ // Instead, it simply enqueues the same RefSCC onto the pipeline's
+ // worklist.
+ UR.CWorklist.insert(&C);
+ F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
+ continue;
+ }
+ F.removeFnAttr(CORO_PRESPLIT_ATTR);
+
+ SmallVector<Function *, 4> Clones;
+ const coro::Shape Shape = splitCoroutine(F, Clones);
+ updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
+ }
+
+ if (PrepareFn)
+ llvm_unreachable("new pass manager cannot yet handle "
+ "'llvm.coro.prepare.retcon'");
+
+ return PreservedAnalyses::none();
+}
namespace {
+// We present a coroutine to LLVM as an ordinary function with suspension
+// points marked up with intrinsics. We let the optimizer party on the coroutine
+// as a single function for as long as possible. Shortly before the coroutine is
+// eligible to be inlined into its callers, we split up the coroutine into parts
+// corresponding to initial, resume and destroy invocations of the coroutine,
+// add them to the current SCC and restart the IPO pipeline to optimize the
+// coroutine subfunctions we extracted before proceeding to the caller of the
+// coroutine.
struct CoroSplitLegacy : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
@@ -1539,9 +1732,7 @@ struct CoroSplitLegacy : public CallGraphSCCPass {
// A coroutine is identified by the presence of coro.begin intrinsic, if
// we don't have any, this pass has nothing to do.
bool doInitialization(CallGraph &CG) override {
- Run = coro::declaresIntrinsics(CG.getModule(),
- {"llvm.coro.begin",
- "llvm.coro.prepare.retcon"});
+ Run = declaresCoroSplitIntrinsics(CG.getModule());
return CallGraphSCCPass::doInitialization(CG);
}
@@ -1583,7 +1774,10 @@ struct CoroSplitLegacy : public CallGraphSCCPass {
continue;
}
F->removeFnAttr(CORO_PRESPLIT_ATTR);
- splitCoroutine(*F, CG, SCC);
+
+ SmallVector<Function *, 4> Clones;
+ const coro::Shape Shape = splitCoroutine(*F, Clones);
+ updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC);
}
if (PrepareFn)
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 02d11af3303f..87c3a8b0d0cf 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -649,3 +648,9 @@ void LLVMAddCoroElidePass(LLVMPassManagerRef PM) {
void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCoroCleanupLegacyPass());
}
+
+void
+LLVMPassManagerBuilderAddCoroutinePassesToExtensionPoints(LLVMPassManagerBuilderRef PMB) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ addCoroutinePassesToExtensionPoints(*Builder);
+}