src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-08-22 19:00:43 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-11-13 20:39:49 +0000
commit	fe6060f10f634930ff71b7c50291ddc610da2475 (patch)
tree	1483580c790bd4d27b6500a7542b5ee00534d3cc /contrib/llvm-project/llvm/lib/Transforms/Coroutines
parent	b61bce17f346d79cecfd8f195a64b10f77be43b1 (diff)
parent	344a3780b2e33f6ca763666c380202b18aab72a3 (diff)

Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Coroutines')

-rw-r--r--

contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp

-rw-r--r--

contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp

-rw-r--r--

contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp

752

-rw-r--r--

contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h

-rw-r--r--

contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h

-rw-r--r--

contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp

210

-rw-r--r--

contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp

7 files changed, 823 insertions, 287 deletions

diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index 298149f8b546..5b09cdb35791 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp

@@ -80,6 +80,23 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {

case Intrinsic::coro_subfn_addr:

lowerSubFn(Builder, cast<CoroSubFnInst>(II));

break;

+ case Intrinsic::coro_async_size_replace:

+ auto *Target = cast<ConstantStruct>(

+ cast<GlobalVariable>(II->getArgOperand(0)->stripPointerCasts())

+ ->getInitializer());

+ auto *Source = cast<ConstantStruct>(

+ cast<GlobalVariable>(II->getArgOperand(1)->stripPointerCasts())

+ ->getInitializer());

+ auto *TargetSize = Target->getOperand(1);

+ auto *SourceSize = Source->getOperand(1);

+ if (TargetSize->isElementWiseEqual(SourceSize)) {

+ break;

+ }

+ auto *TargetRelativeFunOffset = Target->getOperand(0);

+ auto *NewFuncPtrStruct = ConstantStruct::get(

+ Target->getType(), TargetRelativeFunOffset, SourceSize);

+ Target->replaceAllUsesWith(NewFuncPtrStruct);

+ break;

}

II->eraseFromParent();

Changed = true;

@@ -95,10 +112,10 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {

}

static bool declaresCoroCleanupIntrinsics(const Module &M) {

- return coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin",

- "llvm.coro.subfn.addr", "llvm.coro.free",

- "llvm.coro.id", "llvm.coro.id.retcon",

- "llvm.coro.id.retcon.once"});

+ return coro::declaresIntrinsics(

+ M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr",

+ "llvm.coro.free", "llvm.coro.id", "llvm.coro.id.retcon",

+ "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace"});

}

PreservedAnalyses CoroCleanupPass::run(Function &F,

diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index 07a183cfc66b..84bebb7bf42d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp

@@ -9,6 +9,7 @@

#include "llvm/Transforms/Coroutines/CoroElide.h"

#include "CoroInternal.h"

#include "llvm/ADT/DenseMap.h"

+#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/Analysis/InstructionSimplify.h"

#include "llvm/IR/Dominators.h"

@@ -16,11 +17,20 @@

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Support/ErrorHandling.h"

+#include "llvm/Support/FileSystem.h"

using namespace llvm;

#define DEBUG_TYPE "coro-elide"

+STATISTIC(NumOfCoroElided, "The # of coroutine get elided.");

+#ifndef NDEBUG

+static cl::opt<std::string> CoroElideInfoOutputFilename(

+ "coro-elide-info-output-file", cl::value_desc("filename"),

+ cl::desc("File to record the coroutines got elided"), cl::Hidden);

+#endif

namespace {

// Created on demand if the coro-elide pass has work to do.

struct Lowerer : coro::LowererBase {

@@ -29,7 +39,6 @@ struct Lowerer : coro::LowererBase {

SmallVector<CoroAllocInst *, 1> CoroAllocs;

SmallVector<CoroSubFnInst *, 4> ResumeAddr;

DenseMap<CoroBeginInst *, SmallVector<CoroSubFnInst *, 4>> DestroyAddr;

- SmallVector<CoroFreeInst *, 1> CoroFrees;

SmallPtrSet<const SwitchInst *, 4> CoroSuspendSwitches;

Lowerer(Module &M) : LowererBase(M) {}

@@ -71,7 +80,7 @@ static void replaceWithConstant(Constant *Value,

// See if any operand of the call instruction references the coroutine frame.

static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA) {

for (Value *Op : CI->operand_values())

- if (AA.alias(Op, Frame) != NoAlias)

+ if (!AA.isNoAlias(Op, Frame))

return true;

return false;

}

@@ -79,11 +88,16 @@ static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA) {

// Look for any tail calls referencing the coroutine frame and remove tail

// attribute from them, since now coroutine frame resides on the stack and tail

// call implies that the function does not references anything on the stack.

+// However if it's a musttail call, we cannot remove the tailcall attribute.

+// It's safe to keep it there as the musttail call is for symmetric transfer,

+// and by that point the frame should have been destroyed and hence not

+// interfering with operands.

static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {

Function &F = *Frame->getFunction();

for (Instruction &I : instructions(F))

if (auto *Call = dyn_cast<CallInst>(&I))

- if (Call->isTailCall() && operandReferences(Call, Frame, AA))

+ if (Call->isTailCall() && operandReferences(Call, Frame, AA) &&

+ !Call->isMustTailCall())

Call->setTailCall(false);

}

@@ -114,6 +128,21 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {

llvm_unreachable("no terminator in the entry block");

}

+#ifndef NDEBUG

+static std::unique_ptr<raw_fd_ostream> getOrCreateLogFile() {

+ assert(!CoroElideInfoOutputFilename.empty() &&

+ "coro-elide-info-output-file shouldn't be empty");

+ std::error_code EC;

+ auto Result = std::make_unique<raw_fd_ostream>(CoroElideInfoOutputFilename,

+ EC, sys::fs::OF_Append);

+ if (!EC)

+ return Result;

+ llvm::errs() << "Error opening coro-elide-info-output-file '"

+ << CoroElideInfoOutputFilename << " for appending!\n";

+ return std::make_unique<raw_fd_ostream>(2, false); // stderr.

+#endif

// To elide heap allocations we need to suppress code blocks guarded by

// llvm.coro.alloc and llvm.coro.free instructions.

void Lowerer::elideHeapAllocations(Function *F, uint64_t FrameSize,

@@ -227,17 +256,22 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {

// Filter out the coro.destroy that lie along exceptional paths.

SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;

for (auto &It : DestroyAddr) {

+ // If there is any coro.destroy dominates all of the terminators for the

+ // coro.begin, we could know the corresponding coro.begin wouldn't escape.

for (Instruction *DA : It.second) {

- for (BasicBlock *TI : Terminators) {

- if (DT.dominates(DA, TI->getTerminator())) {

- ReferencedCoroBegins.insert(It.first);

- break;

- }

+ if (llvm::all_of(Terminators, [&](auto *TI) {

+ return DT.dominates(DA, TI->getTerminator());

+ })) {

+ ReferencedCoroBegins.insert(It.first);

+ break;

}

// Whether there is any paths from coro.begin to Terminators which not pass

// through any of the coro.destroys.

+ //

+ // hasEscapePath is relatively slow, so we avoid to run it as much as

+ // possible.

if (!ReferencedCoroBegins.count(It.first) &&

!hasEscapePath(It.first, Terminators))

ReferencedCoroBegins.insert(It.first);

@@ -246,20 +280,7 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {

// If size of the set is the same as total number of coro.begin, that means we

// found a coro.free or coro.destroy referencing each coro.begin, so we can

// perform heap elision.

- if (ReferencedCoroBegins.size() != CoroBegins.size())

- return false;

- // If any call in the function is a musttail call, it usually won't work

- // because we cannot drop the tailcall attribute, and a tail call will reuse

- // the entire stack where we are going to put the new frame. In theory a more

- // precise analysis can be done to check whether the new frame aliases with

- // the call, however it's challenging to do so before the elision actually

- // happened.

- for (BasicBlock &BB : *F)

- if (BB.getTerminatingMustTailCall())

- return false;

- return true;

+ return ReferencedCoroBegins.size() == CoroBegins.size();

}

void Lowerer::collectPostSplitCoroIds(Function *F) {

@@ -290,7 +311,6 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,

DominatorTree &DT) {

CoroBegins.clear();

CoroAllocs.clear();

- CoroFrees.clear();

ResumeAddr.clear();

DestroyAddr.clear();

@@ -300,8 +320,6 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,

CoroBegins.push_back(CB);

else if (auto *CA = dyn_cast<CoroAllocInst>(U))

CoroAllocs.push_back(CA);

- else if (auto *CF = dyn_cast<CoroFreeInst>(U))

- CoroFrees.push_back(CF);

}

// Collect all coro.subfn.addrs associated with coro.begin.

@@ -347,6 +365,13 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,

elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first,

FrameSizeAndAlign.second, AA);

coro::replaceCoroFree(CoroId, /*Elide=*/true);

+ NumOfCoroElided++;

+#ifndef NDEBUG

+ if (!CoroElideInfoOutputFilename.empty())

+ *getOrCreateLogFile()

+ << "Elide " << CoroId->getCoroutine()->getName() << " in "

+ << CoroId->getFunction()->getName() << "\n";

+#endif

}

return true;

diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index e1e0d50979dc..beae5fdac8ab 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp

@@ -12,8 +12,6 @@

// contain those values. All uses of those values are replaced with appropriate

// GEP + load from the coroutine frame. At the point of the definition we spill

// the value into the coroutine frame.

-//

-// TODO: pack values tightly using liveness info.

//===----------------------------------------------------------------------===//

#include "CoroInternal.h"

@@ -32,6 +30,7 @@

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/OptimizedStructLayout.h"

#include "llvm/Support/circular_raw_ostream.h"

+#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Transforms/Utils/PromoteMemToReg.h"

@@ -163,6 +162,16 @@ struct SuspendCrossingInfo {

return isDefinitionAcrossSuspend(DefBB, U);

}

+ bool isDefinitionAcrossSuspend(Value &V, User *U) const {

+ if (auto *Arg = dyn_cast<Argument>(&V))

+ return isDefinitionAcrossSuspend(*Arg, U);

+ if (auto *Inst = dyn_cast<Instruction>(&V))

+ return isDefinitionAcrossSuspend(*Inst, U);

+ llvm_unreachable(

+ "Coroutine could only collect Argument and Instruction now.");

+ }

};

} // end anonymous namespace

@@ -336,6 +345,28 @@ struct FrameDataInfo {

FieldIndexMap[V] = Index;

}

+ uint64_t getAlign(Value *V) const {

+ auto Iter = FieldAlignMap.find(V);

+ assert(Iter != FieldAlignMap.end());

+ return Iter->second;

+ }

+ void setAlign(Value *V, uint64_t Align) {

+ assert(FieldAlignMap.count(V) == 0);

+ FieldAlignMap.insert({V, Align});

+ }

+ uint64_t getOffset(Value *V) const {

+ auto Iter = FieldOffsetMap.find(V);

+ assert(Iter != FieldOffsetMap.end());

+ return Iter->second;

+ }

+ void setOffset(Value *V, uint64_t Offset) {

+ assert(FieldOffsetMap.count(V) == 0);

+ FieldOffsetMap.insert({V, Offset});

+ }

// Remap the index of every field in the frame, using the final layout index.

void updateLayoutIndex(FrameTypeBuilder &B);

@@ -347,6 +378,12 @@ private:

// with their original insertion field index. After the frame is built, their

// indexes will be updated into the final layout index.

DenseMap<Value *, uint32_t> FieldIndexMap;

+ // Map from values to their alignment on the frame. They would be set after

+ // the frame is built.

+ DenseMap<Value *, uint64_t> FieldAlignMap;

+ // Map from values to their offset on the frame. They would be set after

+ // the frame is built.

+ DenseMap<Value *, uint64_t> FieldOffsetMap;

};

} // namespace

@@ -392,12 +429,15 @@ private:

Align StructAlign;

bool IsFinished = false;

+ Optional<Align> MaxFrameAlignment;

SmallVector<Field, 8> Fields;

DenseMap<Value*, unsigned> FieldIndexByKey;

public:

- FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)

- : DL(DL), Context(Context) {}

+ FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL,

+ Optional<Align> MaxFrameAlignment)

+ : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}

/// Add a field to this structure for the storage of an `alloca`

/// instruction.

@@ -448,17 +488,32 @@ public:

/// Add a field to this structure.

LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,

- bool IsHeader = false) {

+ bool IsHeader = false,

+ bool IsSpillOfValue = false) {

assert(!IsFinished && "adding fields to a finished builder");

assert(Ty && "must provide a type for a field");

// The field size is always the alloc size of the type.

uint64_t FieldSize = DL.getTypeAllocSize(Ty);

+ // For an alloca with size=0, we don't need to add a field and they

+ // can just point to any index in the frame. Use index 0.

+ if (FieldSize == 0) {

+ return 0;

+ }

// The field alignment might not be the type alignment, but we need

// to remember the type alignment anyway to build the type.

- Align TyAlignment = DL.getABITypeAlign(Ty);

- if (!FieldAlignment) FieldAlignment = TyAlignment;

+ // If we are spilling values we don't need to worry about ABI alignment

+ // concerns.

+ auto ABIAlign = DL.getABITypeAlign(Ty);

+ Align TyAlignment =

+ (IsSpillOfValue && MaxFrameAlignment)

+ ? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign)

+ : ABIAlign;

+ if (!FieldAlignment) {

+ FieldAlignment = TyAlignment;

+ }

// Lay out header fields immediately.

uint64_t Offset;

@@ -492,12 +547,20 @@ public:

assert(IsFinished && "not yet finished!");

return Fields[Id].LayoutFieldIndex;

}

+ Field getLayoutField(FieldIDType Id) const {

+ assert(IsFinished && "not yet finished!");

+ return Fields[Id];

+ }

};

} // namespace

void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {

auto Updater = [&](Value *I) {

- setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I)));

+ auto Field = B.getLayoutField(getFieldIndex(I));

+ setFieldIndex(I, Field.LayoutFieldIndex);

+ setAlign(I, Field.Alignment.value());

+ setOffset(I, Field.Offset);

};

LayoutIndexUpdateStarted = true;

for (auto &S : Spills)

@@ -510,7 +573,6 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {

void FrameTypeBuilder::addFieldForAllocas(const Function &F,

FrameDataInfo &FrameData,

coro::Shape &Shape) {

- DenseMap<AllocaInst *, unsigned int> AllocaIndex;

using AllocaSetType = SmallVector<AllocaInst *, 4>;

SmallVector<AllocaSetType, 4> NonOverlapedAllocas;

@@ -532,7 +594,6 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,

if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) {

for (const auto &A : FrameData.Allocas) {

AllocaInst *Alloca = A.Alloca;

- AllocaIndex[Alloca] = NonOverlapedAllocas.size();

NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));

}

return;

@@ -613,13 +674,11 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,

bool CouldMerge = NoInference && Alignable;

if (!CouldMerge)

continue;

- AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()];

AllocaSet.push_back(Alloca);

Merged = true;

break;

}

if (!Merged) {

- AllocaIndex[Alloca] = NonOverlapedAllocas.size();

NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));

}

@@ -716,6 +775,314 @@ void FrameTypeBuilder::finish(StructType *Ty) {

IsFinished = true;

}

+static void cacheDIVar(FrameDataInfo &FrameData,

+ DenseMap<Value *, DILocalVariable *> &DIVarCache) {

+ for (auto *V : FrameData.getAllDefs()) {

+ if (DIVarCache.find(V) != DIVarCache.end())

+ continue;

+ auto DDIs = FindDbgDeclareUses(V);

+ auto *I = llvm::find_if(DDIs, [](DbgDeclareInst *DDI) {

+ return DDI->getExpression()->getNumElements() == 0;

+ });

+ if (I != DDIs.end())

+ DIVarCache.insert({V, (*I)->getVariable()});

+ }

+/// Create name for Type. It uses MDString to store new created string to

+/// avoid memory leak.

+static StringRef solveTypeName(Type *Ty) {

+ if (Ty->isIntegerTy()) {

+ // The longest name in common may be '__int_128', which has 9 bits.

+ SmallString<16> Buffer;

+ raw_svector_ostream OS(Buffer);

+ OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();

+ auto *MDName = MDString::get(Ty->getContext(), OS.str());

+ return MDName->getString();

+ }

+ if (Ty->isFloatingPointTy()) {

+ if (Ty->isFloatTy())

+ return "__float_";

+ if (Ty->isDoubleTy())

+ return "__double_";

+ return "__floating_type_";

+ }

+ if (Ty->isPointerTy()) {

+ auto *PtrTy = cast<PointerType>(Ty);

+ Type *PointeeTy = PtrTy->getElementType();

+ auto Name = solveTypeName(PointeeTy);

+ if (Name == "UnknownType")

+ return "PointerType";

+ SmallString<16> Buffer;

+ Twine(Name + "_Ptr").toStringRef(Buffer);

+ auto *MDName = MDString::get(Ty->getContext(), Buffer.str());

+ return MDName->getString();

+ }

+ if (Ty->isStructTy()) {

+ if (!cast<StructType>(Ty)->hasName())

+ return "__LiteralStructType_";

+ auto Name = Ty->getStructName();

+ SmallString<16> Buffer(Name);

+ for_each(Buffer, [](auto &Iter) {

+ if (Iter == '.' || Iter == ':')

+ Iter = '_';

+ });

+ auto *MDName = MDString::get(Ty->getContext(), Buffer.str());

+ return MDName->getString();

+ }

+ return "UnknownType";

+static DIType *solveDIType(DIBuilder &Builder, Type *Ty, DataLayout &Layout,

+ DIScope *Scope, unsigned LineNum,

+ DenseMap<Type *, DIType *> &DITypeCache) {

+ if (DIType *DT = DITypeCache.lookup(Ty))

+ return DT;

+ StringRef Name = solveTypeName(Ty);

+ DIType *RetType = nullptr;

+ if (Ty->isIntegerTy()) {

+ auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();

+ RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,

+ llvm::DINode::FlagArtificial);

+ } else if (Ty->isFloatingPointTy()) {

+ RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),

+ dwarf::DW_ATE_float,

+ llvm::DINode::FlagArtificial);

+ } else if (Ty->isPointerTy()) {

+ // Construct BasicType instead of PointerType to avoid infinite

+ // search problem.

+ // For example, we would be in trouble if we traverse recursively:

+ //

+ // struct Node {

+ // Node* ptr;

+ // };

+ RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),

+ dwarf::DW_ATE_address,

+ llvm::DINode::FlagArtificial);

+ } else if (Ty->isStructTy()) {

+ auto *DIStruct = Builder.createStructType(

+ Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),

+ Layout.getPrefTypeAlignment(Ty), llvm::DINode::FlagArtificial, nullptr,

+ llvm::DINodeArray());

+ auto *StructTy = cast<StructType>(Ty);

+ SmallVector<Metadata *, 16> Elements;

+ for (unsigned I = 0; I < StructTy->getNumElements(); I++) {

+ DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,

+ Scope, LineNum, DITypeCache);

+ assert(DITy);

+ Elements.push_back(Builder.createMemberType(

+ Scope, DITy->getName(), Scope->getFile(), LineNum,

+ DITy->getSizeInBits(), DITy->getAlignInBits(),

+ Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),

+ llvm::DINode::FlagArtificial, DITy));

+ }

+ Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));

+ RetType = DIStruct;

+ } else {

+ LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n";);

+ SmallString<32> Buffer;

+ raw_svector_ostream OS(Buffer);

+ OS << Name.str() << "_" << Layout.getTypeSizeInBits(Ty);

+ RetType = Builder.createBasicType(OS.str(), Layout.getTypeSizeInBits(Ty),

+ dwarf::DW_ATE_address,

+ llvm::DINode::FlagArtificial);

+ }

+ DITypeCache.insert({Ty, RetType});

+ return RetType;

+/// Build artificial debug info for C++ coroutine frames to allow users to

+/// inspect the contents of the frame directly

+///

+/// Create Debug information for coroutine frame with debug name "__coro_frame".

+/// The debug information for the fields of coroutine frame is constructed from

+/// the following way:

+/// 1. For all the value in the Frame, we search the use of dbg.declare to find

+/// the corresponding debug variables for the value. If we can find the

+/// debug variable, we can get full and accurate debug information.

+/// 2. If we can't get debug information in step 1 and 2, we could only try to

+/// build the DIType by Type. We did this in solveDIType. We only handle

+/// integer, float, double, integer type and struct type for now.

+static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,

+ FrameDataInfo &FrameData) {

+ DISubprogram *DIS = F.getSubprogram();

+ // If there is no DISubprogram for F, it implies the Function are not compiled

+ // with debug info. So we also don't need to generate debug info for the frame

+ // neither.

+ if (!DIS || !DIS->getUnit() ||

+ !dwarf::isCPlusPlus(

+ (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()))

+ return;

+ assert(Shape.ABI == coro::ABI::Switch &&

+ "We could only build debug infomation for C++ coroutine now.\n");

+ DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);

+ AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();

+ assert(PromiseAlloca &&

+ "Coroutine with switch ABI should own Promise alloca");

+ TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(PromiseAlloca);

+ if (DIs.empty())

+ return;

+ DbgDeclareInst *PromiseDDI = DIs.front();

+ DILocalVariable *PromiseDIVariable = PromiseDDI->getVariable();

+ DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();

+ DIFile *DFile = PromiseDIScope->getFile();

+ DILocation *DILoc = PromiseDDI->getDebugLoc().get();

+ unsigned LineNum = PromiseDIVariable->getLine();

+ DICompositeType *FrameDITy = DBuilder.createStructType(

+ DIS, "__coro_frame_ty", DFile, LineNum, Shape.FrameSize * 8,

+ Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,

+ llvm::DINodeArray());

+ StructType *FrameTy = Shape.FrameTy;

+ SmallVector<Metadata *, 16> Elements;

+ DataLayout Layout = F.getParent()->getDataLayout();

+ DenseMap<Value *, DILocalVariable *> DIVarCache;

+ cacheDIVar(FrameData, DIVarCache);

+ unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;

+ unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;

+ unsigned IndexIndex = Shape.SwitchLowering.IndexField;

+ DenseMap<unsigned, StringRef> NameCache;

+ NameCache.insert({ResumeIndex, "__resume_fn"});

+ NameCache.insert({DestroyIndex, "__destroy_fn"});

+ NameCache.insert({IndexIndex, "__coro_index"});

+ Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),

+ *DestroyFnTy = FrameTy->getElementType(DestroyIndex),

+ *IndexTy = FrameTy->getElementType(IndexIndex);

+ DenseMap<unsigned, DIType *> TyCache;

+ TyCache.insert({ResumeIndex,

+ DBuilder.createBasicType("__resume_fn",

+ Layout.getTypeSizeInBits(ResumeFnTy),

+ dwarf::DW_ATE_address)});

+ TyCache.insert(

+ {DestroyIndex, DBuilder.createBasicType(

+ "__destroy_fn", Layout.getTypeSizeInBits(DestroyFnTy),

+ dwarf::DW_ATE_address)});

+ /// FIXME: If we fill the field `SizeInBits` with the actual size of

+ /// __coro_index in bits, then __coro_index wouldn't show in the debugger.

+ TyCache.insert({IndexIndex, DBuilder.createBasicType(

+ "__coro_index",

+ (Layout.getTypeSizeInBits(IndexTy) < 8)

+ ? 8

+ : Layout.getTypeSizeInBits(IndexTy),

+ dwarf::DW_ATE_unsigned_char)});

+ for (auto *V : FrameData.getAllDefs()) {

+ if (DIVarCache.find(V) == DIVarCache.end())

+ continue;

+ auto Index = FrameData.getFieldIndex(V);

+ NameCache.insert({Index, DIVarCache[V]->getName()});

+ TyCache.insert({Index, DIVarCache[V]->getType()});

+ }

+ // Cache from index to (Align, Offset Pair)

+ DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;

+ // The Align and Offset of Resume function and Destroy function are fixed.

+ OffsetCache.insert({ResumeIndex, {8, 0}});

+ OffsetCache.insert({DestroyIndex, {8, 8}});

+ OffsetCache.insert(

+ {IndexIndex,

+ {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});

+ for (auto *V : FrameData.getAllDefs()) {

+ auto Index = FrameData.getFieldIndex(V);

+ OffsetCache.insert(

+ {Index, {FrameData.getAlign(V), FrameData.getOffset(V)}});

+ }

+ DenseMap<Type *, DIType *> DITypeCache;

+ // This counter is used to avoid same type names. e.g., there would be

+ // many i32 and i64 types in one coroutine. And we would use i32_0 and

+ // i32_1 to avoid the same type. Since it makes no sense the name of the

+ // fields confilicts with each other.

+ unsigned UnknownTypeNum = 0;

+ for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {

+ if (OffsetCache.find(Index) == OffsetCache.end())

+ continue;

+ std::string Name;

+ uint64_t SizeInBits;

+ uint32_t AlignInBits;

+ uint64_t OffsetInBits;

+ DIType *DITy = nullptr;

+ Type *Ty = FrameTy->getElementType(Index);

+ assert(Ty->isSized() && "We can't handle type which is not sized.\n");

+ SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedSize();

+ AlignInBits = OffsetCache[Index].first * 8;

+ OffsetInBits = OffsetCache[Index].second * 8;

+ if (NameCache.find(Index) != NameCache.end()) {

+ Name = NameCache[Index].str();

+ DITy = TyCache[Index];

+ } else {

+ DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);

+ assert(DITy && "SolveDIType shouldn't return nullptr.\n");

+ Name = DITy->getName().str();

+ Name += "_" + std::to_string(UnknownTypeNum);

+ UnknownTypeNum++;

+ }

+ Elements.push_back(DBuilder.createMemberType(

+ FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,

+ llvm::DINode::FlagArtificial, DITy));

+ }

+ DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));

+ auto *FrameDIVar = DBuilder.createAutoVariable(PromiseDIScope, "__coro_frame",

+ DFile, LineNum, FrameDITy,

+ true, DINode::FlagArtificial);

+ assert(FrameDIVar->isValidLocationForIntrinsic(PromiseDDI->getDebugLoc()));

+ // Subprogram would have ContainedNodes field which records the debug

+ // variables it contained. So we need to add __coro_frame to the

+ // ContainedNodes of it.

+ //

+ // If we don't add __coro_frame to the RetainedNodes, user may get

+ // `no symbol __coro_frame in context` rather than `__coro_frame`

+ // is optimized out, which is more precise.

+ if (auto *SubProgram = dyn_cast<DISubprogram>(PromiseDIScope)) {

+ auto RetainedNodes = SubProgram->getRetainedNodes();

+ SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),

+ RetainedNodes.end());

+ RetainedNodesVec.push_back(FrameDIVar);

+ SubProgram->replaceOperandWith(

+ 7, (MDTuple::get(F.getContext(), RetainedNodesVec)));

+ }

+ DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,

+ DBuilder.createExpression(), DILoc,

+ Shape.FramePtr->getNextNode());

// Build a struct that will keep state for an active coroutine.

// struct f.frame {

// ResumeFnTy ResumeFnAddr;

@@ -734,7 +1101,11 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,

return StructType::create(C, Name);

}();

- FrameTypeBuilder B(C, DL);

+ // We will use this value to cap the alignment of spilled values.

+ Optional<Align> MaxFrameAlignment;

+ if (Shape.ABI == coro::ABI::Async)

+ MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();

+ FrameTypeBuilder B(C, DL, MaxFrameAlignment);

AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();

Optional<FieldIDType> SwitchIndexFieldId;

@@ -786,8 +1157,9 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,

// instead of the pointer itself.

if (const Argument *A = dyn_cast<Argument>(S.first))

if (A->hasByValAttr())

- FieldType = FieldType->getPointerElementType();

- FieldIDType Id = B.addField(FieldType, None);

+ FieldType = A->getParamByValType();

+ FieldIDType Id =

+ B.addField(FieldType, None, false /*header*/, true /*IsSpillOfValue*/);

FrameData.setFieldIndex(S.first, Id);

}

@@ -797,15 +1169,18 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,

Shape.FrameSize = B.getStructSize();

switch (Shape.ABI) {

- case coro::ABI::Switch:

+ case coro::ABI::Switch: {

// In the switch ABI, remember the switch-index field.

- Shape.SwitchLowering.IndexField =

- B.getLayoutFieldIndex(*SwitchIndexFieldId);

+ auto IndexField = B.getLayoutField(*SwitchIndexFieldId);

+ Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;

+ Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();

+ Shape.SwitchLowering.IndexOffset = IndexField.Offset;

// Also round the frame size up to a multiple of its alignment, as is

// generally expected in C/C++.

Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);

break;

+ }

// In the retcon ABI, remember whether the frame is inline in the storage.

case coro::ABI::Retcon:

@@ -869,7 +1244,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {

: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {}

void visit(Instruction &I) {

- UserBBs.insert(I.getParent());

+ Users.insert(&I);

Base::visit(I);

// If the pointer is escaped prior to CoroBegin, we have to assume it would

// be written into before CoroBegin as well.

@@ -972,6 +1347,12 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {

handleAlias(GEPI);

}

+ void visitIntrinsicInst(IntrinsicInst &II) {

+ if (II.getIntrinsicID() != Intrinsic::lifetime_start)

+ return Base::visitIntrinsicInst(II);

+ LifetimeStarts.insert(&II);

+ }

void visitCallBase(CallBase &CB) {

for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op)

if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))

@@ -1005,18 +1386,40 @@ private:

// after CoroBegin. Each entry contains the instruction and the offset in the

// original Alloca. They need to be recreated after CoroBegin off the frame.

DenseMap<Instruction *, llvm::Optional<APInt>> AliasOffetMap{};

- SmallPtrSet<BasicBlock *, 2> UserBBs{};

+ SmallPtrSet<Instruction *, 4> Users{};

+ SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};

bool MayWriteBeforeCoroBegin{false};

mutable llvm::Optional<bool> ShouldLiveOnFrame{};

bool computeShouldLiveOnFrame() const {

+ // If lifetime information is available, we check it first since it's

+ // more precise. We look at every pair of lifetime.start intrinsic and

+ // every basic block that uses the pointer to see if they cross suspension

+ // points. The uses cover both direct uses as well as indirect uses.

+ if (!LifetimeStarts.empty()) {

+ for (auto *I : Users)

+ for (auto *S : LifetimeStarts)

+ if (Checker.isDefinitionAcrossSuspend(*S, I))

+ return true;

+ return false;

+ }

+ // FIXME: Ideally the isEscaped check should come at the beginning.

+ // However there are a few loose ends that need to be fixed first before

+ // we can do that. We need to make sure we are not over-conservative, so

+ // that the data accessed in-between await_suspend and symmetric transfer

+ // is always put on the stack, and also data accessed after coro.end is

+ // always put on the stack (esp the return object). To fix that, we need

+ // to:

+ // 1) Potentially treat sret as nocapture in calls

+ // 2) Special handle the return object and put it on the stack

+ // 3) Utilize lifetime.end intrinsic

if (PI.isEscaped())

return true;

- for (auto *BB1 : UserBBs)

- for (auto *BB2 : UserBBs)

- if (Checker.hasPathCrossingSuspendPoint(BB1, BB2))

+ for (auto *U1 : Users)

+ for (auto *U2 : Users)

+ if (Checker.isDefinitionAcrossSuspend(*U1, U2))

return true;

return false;

@@ -1078,6 +1481,15 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {

return CleanupRet;

}

+static void createFramePtr(coro::Shape &Shape) {

+ auto *CB = Shape.CoroBegin;

+ IRBuilder<> Builder(CB->getNextNode());

+ StructType *FrameTy = Shape.FrameTy;

+ PointerType *FramePtrTy = FrameTy->getPointerTo();

+ Shape.FramePtr =

+ cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));

// Replace all alloca and SSA values that are accessed across suspend points

// with GetElementPointer from coroutine frame + loads and stores. Create an

// AllocaSpillBB that will become the new entry block for the resume parts of

@@ -1104,11 +1516,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

coro::Shape &Shape) {

auto *CB = Shape.CoroBegin;

LLVMContext &C = CB->getContext();

- IRBuilder<> Builder(CB->getNextNode());

+ IRBuilder<> Builder(C);

StructType *FrameTy = Shape.FrameTy;

- PointerType *FramePtrTy = FrameTy->getPointerTo();

- auto *FramePtr =

- cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));

+ Instruction *FramePtr = Shape.FramePtr;

DominatorTree DT(*CB->getFunction());

SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;

@@ -1152,6 +1562,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

for (auto const &E : FrameData.Spills) {

Value *Def = E.first;

+ auto SpillAlignment = Align(FrameData.getAlign(Def));

// Create a store instruction storing the value into the

// coroutine frame.

Instruction *InsertPt = nullptr;

@@ -1208,9 +1619,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

// instead of the pointer itself.

auto *Value =

Builder.CreateLoad(Def->getType()->getPointerElementType(), Def);

- Builder.CreateStore(Value, G);

+ Builder.CreateAlignedStore(Value, G, SpillAlignment);

} else {

- Builder.CreateStore(Def, G);

+ Builder.CreateAlignedStore(Def, G, SpillAlignment);

}

BasicBlock *CurrentBlock = nullptr;

@@ -1228,9 +1639,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

if (NeedToCopyArgPtrValue)

CurrentReload = GEP;

else

- CurrentReload = Builder.CreateLoad(

+ CurrentReload = Builder.CreateAlignedLoad(

FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,

- E.first->getName() + Twine(".reload"));

+ SpillAlignment, E.first->getName() + Twine(".reload"));

TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);

for (DbgDeclareInst *DDI : DIs) {

@@ -1244,7 +1655,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

&*Builder.GetInsertPoint());

// This dbg.declare is for the main function entry point. It

// will be deleted in all coro-split functions.

- coro::salvageDebugInfo(DbgPtrAllocaCache, DDI);

+ coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.ReuseFrameSlot);

}

@@ -1292,8 +1703,8 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

}

// If we found any alloca, replace all of their remaining uses with GEP

- // instructions. Because new dbg.declare have been created for these alloca,

- // we also delete the original dbg.declare and replace other uses with undef.

+ // instructions. To remain debugbility, we replace the uses of allocas for

+ // dbg.declares and dbg.values with the reload from the frame.

// Note: We cannot replace the alloca with GEP instructions indiscriminately,

// as some of the uses may not be dominated by CoroBegin.

Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());

@@ -1311,17 +1722,10 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

auto *G = GetFramePointer(Alloca);

G->setName(Alloca->getName() + Twine(".reload.addr"));

- SmallPtrSet<BasicBlock *, 4> SeenDbgBBs;

- TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Alloca);

- if (!DIs.empty())

- DIBuilder(*Alloca->getModule(),

- /*AllowUnresolved*/ false)

- .insertDeclare(G, DIs.front()->getVariable(),

- DIs.front()->getExpression(),

- DIs.front()->getDebugLoc(), DIs.front());

- for (auto *DI : FindDbgDeclareUses(Alloca))

- DI->eraseFromParent();

- replaceDbgUsesWithUndef(Alloca);

+ SmallVector<DbgVariableIntrinsic *, 4> DIs;

+ findDbgUsers(DIs, Alloca);

+ for (auto *DVI : DIs)

+ DVI->replaceUsesOfWith(Alloca, G);

for (Instruction *I : UsersToUpdate)

I->replaceUsesOfWith(Alloca, G);

@@ -1347,7 +1751,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

auto *FramePtrRaw =

Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));

auto *AliasPtr = Builder.CreateGEP(

- FramePtrRaw,

+ Type::getInt8Ty(C), FramePtrRaw,

ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue()));

auto *AliasPtrTyped =

Builder.CreateBitCast(AliasPtr, Alias.first->getType());

@@ -1358,77 +1762,6 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

return FramePtr;

}

-// Sets the unwind edge of an instruction to a particular successor.

-static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {

- if (auto *II = dyn_cast<InvokeInst>(TI))

- II->setUnwindDest(Succ);

- else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))

- CS->setUnwindDest(Succ);

- else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))

- CR->setUnwindDest(Succ);

- else

- llvm_unreachable("unexpected terminator instruction");

-// Replaces all uses of OldPred with the NewPred block in all PHINodes in a

-// block.

-static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,

- BasicBlock *NewPred, PHINode *Until = nullptr) {

- unsigned BBIdx = 0;

- for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {

- PHINode *PN = cast<PHINode>(I);

- // We manually update the LandingPadReplacement PHINode and it is the last

- // PHI Node. So, if we find it, we are done.

- if (Until == PN)

- break;

- // Reuse the previous value of BBIdx if it lines up. In cases where we

- // have multiple phi nodes with *lots* of predecessors, this is a speed

- // win because we don't have to scan the PHI looking for TIBB. This

- // happens because the BB list of PHI nodes are usually in the same

- // order.

- if (PN->getIncomingBlock(BBIdx) != OldPred)

- BBIdx = PN->getBasicBlockIndex(OldPred);

- assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!");

- PN->setIncomingBlock(BBIdx, NewPred);

- }

-// Uses SplitEdge unless the successor block is an EHPad, in which case do EH

-// specific handling.

-static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,

- LandingPadInst *OriginalPad,

- PHINode *LandingPadReplacement) {

- auto *PadInst = Succ->getFirstNonPHI();

- if (!LandingPadReplacement && !PadInst->isEHPad())

- return SplitEdge(BB, Succ);

- auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ);

- setUnwindEdgeTo(BB->getTerminator(), NewBB);

- updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);

- if (LandingPadReplacement) {

- auto *NewLP = OriginalPad->clone();

- auto *Terminator = BranchInst::Create(Succ, NewBB);

- NewLP->insertBefore(Terminator);

- LandingPadReplacement->addIncoming(NewLP, NewBB);

- return NewBB;

- }

- Value *ParentPad = nullptr;

- if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))

- ParentPad = FuncletPad->getParentPad();

- else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))

- ParentPad = CatchSwitch->getParentPad();

- else

- llvm_unreachable("handling for other EHPads not implemented yet");

- auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB);

- CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);

- return NewBB;

// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new

// PHI in InsertedBB.

static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,

@@ -1524,6 +1857,24 @@ static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,

}

+static void cleanupSinglePredPHIs(Function &F) {

+ SmallVector<PHINode *, 32> Worklist;

+ for (auto &BB : F) {

+ for (auto &Phi : BB.phis()) {

+ if (Phi.getNumIncomingValues() == 1) {

+ Worklist.push_back(&Phi);

+ } else

+ break;

+ }

+ while (!Worklist.empty()) {

+ auto *Phi = Worklist.back();

+ Worklist.pop_back();

+ auto *OriginalValue = Phi->getIncomingValue(0);

+ Phi->replaceAllUsesWith(OriginalValue);

+ }

static void rewritePHIs(BasicBlock &BB) {

// For every incoming edge we will create a block holding all

// incoming values in a single PHI nodes.

@@ -1631,11 +1982,16 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB,

for (Instruction *U : E.second) {

// If we have not seen this block, materialize the value.

if (CurrentBlock != U->getParent()) {

- CurrentBlock = U->getParent();

+ bool IsInCoroSuspendBlock = isa<AnyCoroSuspendInst>(U);

+ CurrentBlock = IsInCoroSuspendBlock

+ ? U->getParent()->getSinglePredecessor()

+ : U->getParent();

CurrentMaterialization = cast<Instruction>(Def)->clone();

CurrentMaterialization->setName(Def->getName());

CurrentMaterialization->insertBefore(

- &*CurrentBlock->getFirstInsertionPt());

+ IsInCoroSuspendBlock ? CurrentBlock->getTerminator()

+ : &*CurrentBlock->getFirstInsertionPt());

}

if (auto *PN = dyn_cast<PHINode>(U)) {

assert(PN->getNumIncomingValues() == 1 &&

@@ -2122,24 +2478,6 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,

static void collectFrameAllocas(Function &F, coro::Shape &Shape,

const SuspendCrossingInfo &Checker,

SmallVectorImpl<AllocaInfo> &Allocas) {

- // Collect lifetime.start info for each alloca.

- using LifetimeStart = SmallPtrSet<Instruction *, 2>;

- llvm::DenseMap<AllocaInst *, std::unique_ptr<LifetimeStart>> LifetimeMap;

- for (Instruction &I : instructions(F)) {

- auto *II = dyn_cast<IntrinsicInst>(&I);

- if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)

- continue;

- if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) {

- if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) {

- if (LifetimeMap.find(AI) == LifetimeMap.end())

- LifetimeMap[AI] = std::make_unique<LifetimeStart>();

- LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst);

- }

for (Instruction &I : instructions(F)) {

auto *AI = dyn_cast<AllocaInst>(&I);

if (!AI)

@@ -2149,23 +2487,6 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,

if (AI == Shape.SwitchLowering.PromiseAlloca) {

continue;

}

- bool ShouldLiveOnFrame = false;

- auto Iter = LifetimeMap.find(AI);

- if (Iter != LifetimeMap.end()) {

- // Check against lifetime.start if the instruction has the info.

- for (User *U : I.users()) {

- for (auto *S : *Iter->second)

- if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U)))

- break;

- if (ShouldLiveOnFrame)

- break;

- }

- if (!ShouldLiveOnFrame)

- continue;

- }

- // At this point, either ShouldLiveOnFrame is true or we didn't have

- // lifetime information. We will need to rely on more precise pointer

- // tracking.

DominatorTree DT(F);

AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,

*Shape.CoroBegin, Checker};

@@ -2179,58 +2500,94 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,

void coro::salvageDebugInfo(

SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,

- DbgDeclareInst *DDI, bool LoadFromFramePtr) {

- Function *F = DDI->getFunction();

+ DbgVariableIntrinsic *DVI, bool ReuseFrameSlot) {

+ Function *F = DVI->getFunction();

IRBuilder<> Builder(F->getContext());

auto InsertPt = F->getEntryBlock().getFirstInsertionPt();

while (isa<IntrinsicInst>(InsertPt))

++InsertPt;

Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);

- DIExpression *Expr = DDI->getExpression();

+ DIExpression *Expr = DVI->getExpression();

// Follow the pointer arithmetic all the way to the incoming

// function argument and convert into a DIExpression.

- Value *Storage = DDI->getAddress();

+ bool OutermostLoad = true;

+ Value *Storage = DVI->getVariableLocationOp(0);

+ Value *OriginalStorage = Storage;

while (Storage) {

if (auto *LdInst = dyn_cast<LoadInst>(Storage)) {

Storage = LdInst->getOperand(0);

+ // FIXME: This is a heuristic that works around the fact that

+ // LLVM IR debug intrinsics cannot yet distinguish between

+ // memory and value locations: Because a dbg.declare(alloca) is

+ // implicitly a memory location no DW_OP_deref operation for the

+ // last direct load from an alloca is necessary. This condition

+ // effectively drops the *last* DW_OP_deref in the expression.

+ if (!OutermostLoad)

+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);

+ OutermostLoad = false;

} else if (auto *StInst = dyn_cast<StoreInst>(Storage)) {

Storage = StInst->getOperand(0);

} else if (auto *GEPInst = dyn_cast<GetElementPtrInst>(Storage)) {

- Expr = llvm::salvageDebugInfoImpl(*GEPInst, Expr,

- /*WithStackValue=*/false);

+ SmallVector<Value *> AdditionalValues;

+ DIExpression *SalvagedExpr = llvm::salvageDebugInfoImpl(

+ *GEPInst, Expr,

+ /*WithStackValue=*/false, 0, AdditionalValues);

+ // Debug declares cannot currently handle additional location

+ // operands.

+ if (!SalvagedExpr || !AdditionalValues.empty())

+ break;

+ Expr = SalvagedExpr;

Storage = GEPInst->getOperand(0);

} else if (auto *BCInst = dyn_cast<llvm::BitCastInst>(Storage))

Storage = BCInst->getOperand(0);

else

break;

}

+ if (!Storage)

+ return;

// Store a pointer to the coroutine frame object in an alloca so it

// is available throughout the function when producing unoptimized

// code. Extending the lifetime this way is correct because the

// variable has been declared by a dbg.declare intrinsic.

- if (auto Arg = dyn_cast_or_null<llvm::Argument>(Storage)) {

- auto &Cached = DbgPtrAllocaCache[Storage];

- if (!Cached) {

- Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,

- Arg->getName() + ".debug");

- Builder.CreateStore(Storage, Cached);

+ //

+ // Avoid to create the alloca would be eliminated by optimization

+ // passes and the corresponding dbg.declares would be invalid.

+ if (!ReuseFrameSlot && !EnableReuseStorageInFrame)

+ if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {

+ auto &Cached = DbgPtrAllocaCache[Storage];

+ if (!Cached) {

+ Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,

+ Arg->getName() + ".debug");

+ Builder.CreateStore(Storage, Cached);

+ }

+ Storage = Cached;

+ // FIXME: LLVM lacks nuanced semantics to differentiate between

+ // memory and direct locations at the IR level. The backend will

+ // turn a dbg.declare(alloca, ..., DIExpression()) into a memory

+ // location. Thus, if there are deref and offset operations in the

+ // expression, we need to add a DW_OP_deref at the *start* of the

+ // expression to first load the contents of the alloca before

+ // adjusting it with the expression.

+ if (Expr && Expr->isComplex())

+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);

}

- Storage = Cached;

- }

- // The FramePtr object adds one extra layer of indirection that

- // needs to be unwrapped.

- if (LoadFromFramePtr)

- Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);

- auto &VMContext = DDI->getFunction()->getContext();

- DDI->setOperand(

- 0, MetadataAsValue::get(VMContext, ValueAsMetadata::get(Storage)));

- DDI->setOperand(2, MetadataAsValue::get(VMContext, Expr));

- if (auto *InsertPt = dyn_cast_or_null<Instruction>(Storage))

- DDI->moveAfter(InsertPt);

+ DVI->replaceVariableLocationOp(OriginalStorage, Storage);

+ DVI->setExpression(Expr);

+ /// It makes no sense to move the dbg.value intrinsic.

+ if (!isa<DbgValueInst>(DVI)) {

+ if (auto *InsertPt = dyn_cast<Instruction>(Storage))

+ DVI->moveAfter(InsertPt);

+ else if (isa<Argument>(Storage))

+ DVI->moveAfter(F->getEntryBlock().getFirstNonPHI());

+ }

}

void coro::buildCoroutineFrame(Function &F, Shape &Shape) {

- eliminateSwiftError(F, Shape);

+ // Don't eliminate swifterror in async functions that won't be split.

+ if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())

+ eliminateSwiftError(F, Shape);

if (Shape.ABI == coro::ABI::Switch &&

Shape.SwitchLowering.PromiseAlloca) {

@@ -2267,6 +2624,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {

}

+ // Later code makes structural assumptions about single predecessors phis e.g

+ // that they are not live accross a suspend point.

+ cleanupSinglePredPHIs(F);

// Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will

// never has its definition separated from the PHI by the suspend point.

rewritePHIs(F);

@@ -2284,11 +2645,19 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {

for (int Repeat = 0; Repeat < 4; ++Repeat) {

// See if there are materializable instructions across suspend points.

for (Instruction &I : instructions(F))

- if (materializable(I))

+ if (materializable(I)) {

for (User *U : I.users())

if (Checker.isDefinitionAcrossSuspend(I, U))

Spills[&I].push_back(cast<Instruction>(U));

+ // Manually add dbg.value metadata uses of I.

+ SmallVector<DbgValueInst *, 16> DVIs;

+ findDbgValues(DVIs, &I);

+ for (auto *DVI : DVIs)

+ if (Checker.isDefinitionAcrossSuspend(I, DVI))

+ Spills[&I].push_back(DVI);

+ }

if (Spills.empty())

break;

@@ -2301,7 +2670,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {

}

sinkLifetimeStartMarkers(F, Shape, Checker);

- collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);

+ if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())

+ collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);

LLVM_DEBUG(dumpAllocas(FrameData.Allocas));

// Collect the spills for arguments and other not-materializable values.

@@ -2360,12 +2730,30 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {

FrameData.Spills[&I].push_back(cast<Instruction>(U));

}

+ // We don't want the layout of coroutine frame to be affected

+ // by debug information. So we only choose to salvage DbgValueInst for

+ // whose value is already in the frame.

+ // We would handle the dbg.values for allocas specially

+ for (auto &Iter : FrameData.Spills) {

+ auto *V = Iter.first;

+ SmallVector<DbgValueInst *, 16> DVIs;

+ findDbgValues(DVIs, V);

+ llvm::for_each(DVIs, [&](DbgValueInst *DVI) {

+ if (Checker.isDefinitionAcrossSuspend(*V, DVI))

+ FrameData.Spills[V].push_back(DVI);

+ });

+ }

LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));

if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||

Shape.ABI == coro::ABI::Async)

sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin);

Shape.FrameTy = buildFrameType(F, Shape, FrameData);

- Shape.FramePtr = insertSpills(FrameData, Shape);

+ createFramePtr(Shape);

+ // For now, this works for C++ programs only.

+ buildFrameDebugInfo(F, Shape, FrameData);

+ insertSpills(FrameData, Shape);

lowerLocalAllocas(LocalAllocas, DeadInstructions);

for (auto I : DeadInstructions)

diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
index 9fa2fd12f80b..5ed800d67fe9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h

@@ -376,6 +376,18 @@ public:

}

};

+/// This represents the llvm.coro.async.size.replace instruction.

+class LLVM_LIBRARY_VISIBILITY CoroAsyncSizeReplace : public IntrinsicInst {

+public:

+ // Methods to support type inquiry through isa, cast, and dyn_cast:

+ static bool classof(const IntrinsicInst *I) {

+ return I->getIntrinsicID() == Intrinsic::coro_async_size_replace;

+ }

+ static bool classof(const Value *V) {

+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));

+ }

+};

/// This represents the llvm.coro.frame instruction.

class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst {

public:

@@ -511,11 +523,21 @@ inline CoroSaveInst *AnyCoroSuspendInst::getCoroSave() const {

/// This represents the llvm.coro.suspend.async instruction.

class LLVM_LIBRARY_VISIBILITY CoroSuspendAsyncInst : public AnyCoroSuspendInst {

- enum { ResumeFunctionArg, AsyncContextProjectionArg, MustTailCallFuncArg };

public:

+ enum {

+ StorageArgNoArg,

+ ResumeFunctionArg,

+ AsyncContextProjectionArg,

+ MustTailCallFuncArg

+ };

void checkWellFormed() const;

+ unsigned getStorageArgumentIndex() const {

+ auto *Arg = cast<ConstantInt>(getArgOperand(StorageArgNoArg));

+ return Arg->getZExtValue();

+ }

Function *getAsyncContextProjectionFunction() const {

return cast<Function>(

getArgOperand(AsyncContextProjectionArg)->stripPointerCasts());

diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 6c0e52f24542..27ba8524f975 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h

@@ -54,7 +54,7 @@ void updateCallGraph(Function &Caller, ArrayRef<Function *> Funcs,

/// holding a pointer to the coroutine frame.

void salvageDebugInfo(

SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,

- DbgDeclareInst *DDI, bool LoadFromCoroFrame = false);

+ DbgVariableIntrinsic *DVI, bool ReuseFrameSlot);

// Keeps data and helper functions for lowering coroutine intrinsics.

struct LowererBase {

@@ -125,6 +125,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {

Instruction *FramePtr;

BasicBlock *AllocaSpillBlock;

+ /// This would only be true if optimization are enabled.

bool ReuseFrameSlot;

struct SwitchLoweringStorage {

@@ -132,6 +133,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {

AllocaInst *PromiseAlloca;

BasicBlock *ResumeEntryBlock;

unsigned IndexField;

+ unsigned IndexAlign;

+ unsigned IndexOffset;

bool HasFinalSuspend;

};

@@ -146,6 +149,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {

struct AsyncLoweringStorage {

FunctionType *AsyncFuncTy;

Value *Context;

+ CallingConv::ID AsyncCC;

unsigned ContextArgNo;

uint64_t ContextHeaderSize;

uint64_t ContextAlignment;

@@ -208,7 +212,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {

case coro::ABI::RetconOnce:

return RetconLowering.ResumePrototype->getFunctionType();

case coro::ABI::Async:

- return AsyncLowering.AsyncFuncTy;

+ // Not used. The function type depends on the active suspend.

+ return nullptr;

}

llvm_unreachable("Unknown coro::ABI enum");

@@ -245,7 +250,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {

case coro::ABI::RetconOnce:

return RetconLowering.ResumePrototype->getCallingConv();

case coro::ABI::Async:

- return CallingConv::Swift;

+ return AsyncLowering.AsyncCC;

}

llvm_unreachable("Unknown coro::ABI enum");

}

diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index c4d7db9153e2..b6932dbbfc3f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp

@@ -26,6 +26,7 @@

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/ADT/Twine.h"

+#include "llvm/Analysis/CFG.h"

#include "llvm/Analysis/CallGraph.h"

#include "llvm/Analysis/CallGraphSCCPass.h"

#include "llvm/Analysis/LazyCallGraph.h"

@@ -37,6 +38,7 @@

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DerivedTypes.h"

+#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GlobalValue.h"

#include "llvm/IR/GlobalVariable.h"

@@ -367,7 +369,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {

coro::Shape::SwitchFieldIndex::Resume,

"ResumeFn.addr");

auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(

- cast<PointerType>(GepIndex->getType())->getElementType()));

+ FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume)));

Builder.CreateStore(NullPtr, GepIndex);

} else {

auto *GepIndex = Builder.CreateStructGEP(

@@ -454,16 +456,29 @@ void CoroCloner::handleFinalSuspend() {

}

+static FunctionType *

+getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) {

+ auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend);

+ auto *StructTy = cast<StructType>(AsyncSuspend->getType());

+ auto &Context = Suspend->getParent()->getParent()->getContext();

+ auto *VoidTy = Type::getVoidTy(Context);

+ return FunctionType::get(VoidTy, StructTy->elements(), false);

static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,

const Twine &Suffix,

- Module::iterator InsertBefore) {

+ Module::iterator InsertBefore,

+ AnyCoroSuspendInst *ActiveSuspend) {

Module *M = OrigF.getParent();

- auto *FnTy = Shape.getResumeFunctionType();

+ auto *FnTy = (Shape.ABI != coro::ABI::Async)

+ ? Shape.getResumeFunctionType()

+ : getFunctionTypeFromAsyncSuspend(ActiveSuspend);

Function *NewF =

Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,

OrigF.getName() + Suffix);

- NewF->addParamAttr(0, Attribute::NonNull);

+ if (Shape.ABI != coro::ABI::Async)

+ NewF->addParamAttr(0, Attribute::NonNull);

// For the async lowering ABI we can't guarantee that the context argument is

// not access via a different pointer not based on the argument.

@@ -572,6 +587,8 @@ void CoroCloner::replaceCoroEnds() {

static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,

ValueToValueMapTy *VMap) {

+ if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty())

+ return;

Value *CachedSlot = nullptr;

auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {

if (CachedSlot) {

@@ -633,34 +650,34 @@ void CoroCloner::replaceSwiftErrorOps() {

}

void CoroCloner::salvageDebugInfo() {

- SmallVector<DbgDeclareInst *, 8> Worklist;

+ SmallVector<DbgVariableIntrinsic *, 8> Worklist;

SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;

for (auto &BB : *NewF)

for (auto &I : BB)

- if (auto *DDI = dyn_cast<DbgDeclareInst>(&I))

- Worklist.push_back(DDI);

- for (DbgDeclareInst *DDI : Worklist) {

- // This is a heuristic that detects declares left by CoroFrame.

- bool LoadFromFramePtr = !isa<AllocaInst>(DDI->getAddress());

- coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, LoadFromFramePtr);

- }

+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))

+ Worklist.push_back(DVI);

+ for (DbgVariableIntrinsic *DVI : Worklist)

+ coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.ReuseFrameSlot);

// Remove all salvaged dbg.declare intrinsics that became

// either unreachable or stale due to the CoroSplit transformation.

+ DominatorTree DomTree(*NewF);

auto IsUnreachableBlock = [&](BasicBlock *BB) {

- return BB->hasNPredecessors(0) && BB != &NewF->getEntryBlock();

+ return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr,

+ &DomTree);

};

- for (DbgDeclareInst *DDI : Worklist) {

- if (IsUnreachableBlock(DDI->getParent()))

- DDI->eraseFromParent();

- else if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {

+ for (DbgVariableIntrinsic *DVI : Worklist) {

+ if (IsUnreachableBlock(DVI->getParent()))

+ DVI->eraseFromParent();

+ else if (dyn_cast_or_null<AllocaInst>(DVI->getVariableLocationOp(0))) {

// Count all non-debuginfo uses in reachable blocks.

unsigned Uses = 0;

- for (auto *User : DDI->getAddress()->users())

+ for (auto *User : DVI->getVariableLocationOp(0)->users())

if (auto *I = dyn_cast<Instruction>(User))

if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent()))

++Uses;

if (!Uses)

- DDI->eraseFromParent();

+ DVI->eraseFromParent();

}

@@ -717,15 +734,17 @@ void CoroCloner::replaceEntryBlock() {

}

- // Any alloca that's still being used but not reachable from the new entry

- // needs to be moved to the new entry.

+ // Any static alloca that's still being used but not reachable from the new

+ // entry needs to be moved to the new entry.

Function *F = OldEntry->getParent();

DominatorTree DT{*F};

for (auto IT = inst_begin(F), End = inst_end(F); IT != End;) {

Instruction &I = *IT++;

- if (!isa<AllocaInst>(&I) || I.use_empty())

+ auto *Alloca = dyn_cast<AllocaInst>(&I);

+ if (!Alloca || I.use_empty())

continue;

- if (DT.isReachableFromEntry(I.getParent()))

+ if (DT.isReachableFromEntry(I.getParent()) ||

+ !isa<ConstantInt>(Alloca->getArraySize()))

continue;

I.moveBefore(*Entry, Entry->getFirstInsertionPt());

}

@@ -745,10 +764,12 @@ Value *CoroCloner::deriveNewFramePointer() {

// with the active suspend. The frame is located as a tail to the async

// context header.

case coro::ABI::Async: {

- auto *CalleeContext = NewF->getArg(Shape.AsyncLowering.ContextArgNo);

+ auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);

+ auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff;

+ auto *CalleeContext = NewF->getArg(ContextIdx);

auto *FramePtrTy = Shape.FrameTy->getPointerTo();

- auto *ProjectionFunc = cast<CoroSuspendAsyncInst>(ActiveSuspend)

- ->getAsyncContextProjectionFunction();

+ auto *ProjectionFunc =

+ ActiveAsyncSuspend->getAsyncContextProjectionFunction();

auto DbgLoc =

cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc();

// Calling i8* (i8*)

@@ -799,13 +820,27 @@ static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,

Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);

}

+static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context,

+ unsigned ParamIndex) {

+ AttrBuilder ParamAttrs;

+ ParamAttrs.addAttribute(Attribute::SwiftAsync);

+ Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);

+static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context,

+ unsigned ParamIndex) {

+ AttrBuilder ParamAttrs;

+ ParamAttrs.addAttribute(Attribute::SwiftSelf);

+ Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);

/// Clone the body of the original function into a resume function of

/// some sort.

void CoroCloner::create() {

// Create the new function if we don't already have one.

if (!NewF) {

NewF = createCloneDeclaration(OrigF, Shape, Suffix,

- OrigF.getParent()->end());

+ OrigF.getParent()->end(), ActiveSuspend);

}

// Replace all args with undefs. The buildCoroutineFrame algorithm already

@@ -828,15 +863,41 @@ void CoroCloner::create() {

auto savedLinkage = NewF->getLinkage();

NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);

- CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns);

+ CloneFunctionInto(NewF, &OrigF, VMap,

+ CloneFunctionChangeType::LocalChangesOnly, Returns);

+ auto &Context = NewF->getContext();

+ // For async functions / continuations, adjust the scope line of the

+ // clone to the line number of the suspend point. However, only

+ // adjust the scope line when the files are the same. This ensures

+ // line number and file name belong together. The scope line is

+ // associated with all pre-prologue instructions. This avoids a jump

+ // in the linetable from the function declaration to the suspend point.

+ if (DISubprogram *SP = NewF->getSubprogram()) {

+ assert(SP != OrigF.getSubprogram() && SP->isDistinct());

+ if (ActiveSuspend)

+ if (auto DL = ActiveSuspend->getDebugLoc())

+ if (SP->getFile() == DL->getFile())

+ SP->setScopeLine(DL->getLine());

+ // Update the linkage name to reflect the modified symbol name. It

+ // is necessary to update the linkage name in Swift, since the

+ // mangling changes for resume functions. It might also be the

+ // right thing to do in C++, but due to a limitation in LLVM's

+ // AsmPrinter we can only do this if the function doesn't have an

+ // abstract specification, since the DWARF backend expects the

+ // abstract specification to contain the linkage name and asserts

+ // that they are identical.

+ if (!SP->getDeclaration() && SP->getUnit() &&

+ SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift)

+ SP->replaceLinkageName(MDString::get(Context, NewF->getName()));

+ }

NewF->setLinkage(savedLinkage);

NewF->setVisibility(savedVisibility);

NewF->setUnnamedAddr(savedUnnamedAddr);

NewF->setDLLStorageClass(savedDLLStorageClass);

- auto &Context = NewF->getContext();

// Replace the attributes of the new function:

auto OrigAttrs = NewF->getAttributes();

auto NewAttrs = AttributeList();

@@ -851,8 +912,28 @@ void CoroCloner::create() {

addFramePointerAttrs(NewAttrs, Context, 0,

Shape.FrameSize, Shape.FrameAlign);

break;

- case coro::ABI::Async:

+ case coro::ABI::Async: {

+ auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);

+ if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo,

+ Attribute::SwiftAsync)) {

+ uint32_t ArgAttributeIndices =

+ ActiveAsyncSuspend->getStorageArgumentIndex();

+ auto ContextArgIndex = ArgAttributeIndices & 0xff;

+ addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex);

+ // `swiftasync` must preceed `swiftself` so 0 is not a valid index for

+ // `swiftself`.

+ auto SwiftSelfIndex = ArgAttributeIndices >> 8;

+ if (SwiftSelfIndex)

+ addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex);

+ }

+ // Transfer the original function's attributes.

+ auto FnAttrs = OrigF.getAttributes().getFnAttributes();

+ NewAttrs =

+ NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, FnAttrs);

break;

+ }

case coro::ABI::Retcon:

case coro::ABI::RetconOnce:

// If we have a continuation prototype, just use its attributes,

@@ -874,7 +955,7 @@ void CoroCloner::create() {

case coro::ABI::RetconOnce:

// Remove old returns.

for (ReturnInst *Return : Returns)

- changeToUnreachable(Return, /*UseLLVMTrap=*/false);

+ changeToUnreachable(Return);

break;

// With multi-suspend continuations, we'll already have eliminated the

@@ -1068,17 +1149,6 @@ static void postSplitCleanup(Function &F) {

// pass to FPM below because it will also verify all the global data.

if (verifyFunction(F, &errs()))

report_fatal_error("Broken function");

- legacy::FunctionPassManager FPM(F.getParent());

- FPM.add(createSCCPPass());

- FPM.add(createCFGSimplificationPass());

- FPM.add(createEarlyCSEPass());

- FPM.add(createCFGSimplificationPass());

- FPM.doInitialization();

- FPM.run(F);

- FPM.doFinalization();

}

// Assuming we arrived at the block NewBlock from Prev instruction, store

@@ -1245,6 +1315,7 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) {

} else {

CoroBegin->replaceAllUsesWith(CoroBegin->getMem());

}

break;

}

case coro::ABI::Async:

@@ -1453,7 +1524,8 @@ static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,

auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy);

ResumeIntrinsic->replaceAllUsesWith(Val);

ResumeIntrinsic->eraseFromParent();

- Suspend->setOperand(0, UndefValue::get(Int8PtrTy));

+ Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg,

+ UndefValue::get(Int8PtrTy));

}

/// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs.

@@ -1528,8 +1600,23 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,

auto *Suspend = cast<CoroSuspendAsyncInst>(Shape.CoroSuspends[Idx]);

// Create the clone declaration.

- auto *Continuation =

- createCloneDeclaration(F, Shape, ".resume." + Twine(Idx), NextF);

+ auto ResumeNameSuffix = ".resume.";

+ auto ProjectionFunctionName =

+ Suspend->getAsyncContextProjectionFunction()->getName();

+ bool UseSwiftMangling = false;

+ if (ProjectionFunctionName.equals("__swift_async_resume_project_context")) {

+ ResumeNameSuffix = "TQ";

+ UseSwiftMangling = true;

+ } else if (ProjectionFunctionName.equals(

+ "__swift_async_resume_get_context")) {

+ ResumeNameSuffix = "TY";

+ UseSwiftMangling = true;

+ }

+ auto *Continuation = createCloneDeclaration(

+ F, Shape,

+ UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_"

+ : ResumeNameSuffix + Twine(Idx),

+ NextF, Suspend);

Clones.push_back(Continuation);

// Insert a branch to a new return block immediately before the suspend

@@ -1548,7 +1635,8 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,

// Insert the call to the tail call function and inline it.

auto *Fn = Suspend->getMustTailCallFunction();

SmallVector<Value *, 8> Args(Suspend->args());

- auto FnArgs = ArrayRef<Value *>(Args).drop_front(3);

+ auto FnArgs = ArrayRef<Value *>(Args).drop_front(

+ CoroSuspendAsyncInst::MustTailCallFuncArg + 1);

auto *TailCall =

coro::createMustTailCall(Suspend->getDebugLoc(), Fn, FnArgs, Builder);

Builder.CreateRetVoid();

@@ -1629,7 +1717,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,

// Create the clone declaration.

auto Continuation =

- createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF);

+ createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF, nullptr);

Clones.push_back(Continuation);

// Insert a branch to the unified return block immediately before

@@ -1798,7 +1886,8 @@ static void updateCallGraphAfterCoroutineSplit(

case coro::ABI::RetconOnce:

// Each clone in the Async/Retcon lowering references of the other clones.

// Let the LazyCallGraph know about all of them at once.

- CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones);

+ if (!Clones.empty())

+ CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones);

break;

}

@@ -2049,28 +2138,21 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,

// Split all the coroutines.

for (LazyCallGraph::Node *N : Coroutines) {

Function &F = N->getFunction();

- Attribute Attr = F.getFnAttribute(CORO_PRESPLIT_ATTR);

- StringRef Value = Attr.getValueAsString();

LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()

- << "' state: " << Value << "\n");

- if (Value == UNPREPARED_FOR_SPLIT) {

- // Enqueue a second iteration of the CGSCC pipeline on this SCC.

- UR.CWorklist.insert(&C);

- F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);

- continue;

- }

+ << "' state: "

+ << F.getFnAttribute(CORO_PRESPLIT_ATTR).getValueAsString()

+ << "\n");

F.removeFnAttr(CORO_PRESPLIT_ATTR);

SmallVector<Function *, 4> Clones;

const coro::Shape Shape = splitCoroutine(F, Clones, ReuseFrameSlot);

updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);

- if ((Shape.ABI == coro::ABI::Async || Shape.ABI == coro::ABI::Retcon ||

- Shape.ABI == coro::ABI::RetconOnce) &&

- !Shape.CoroSuspends.empty()) {

- // Run the CGSCC pipeline on the newly split functions.

- // All clones will be in the same RefSCC, so choose a random clone.

- UR.RCWorklist.insert(CG.lookupRefSCC(CG.get(*Clones[0])));

+ if (!Shape.CoroSuspends.empty()) {

+ // Run the CGSCC pipeline on the original and newly split functions.

+ UR.CWorklist.insert(&C);

+ for (Function *Clone : Clones)

+ UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone)));

}

diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 6699a5c46313..ae2d9e192c87 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp

@@ -126,6 +126,7 @@ static bool isCoroutineIntrinsicName(StringRef Name) {

"llvm.coro.alloc",

"llvm.coro.async.context.alloc",

"llvm.coro.async.context.dealloc",

+ "llvm.coro.async.size.replace",

"llvm.coro.async.store_resume",

"llvm.coro.begin",

"llvm.coro.destroy",

@@ -360,7 +361,7 @@ void coro::Shape::buildFrom(Function &F) {

// Replace all coro.ends with unreachable instruction.

for (AnyCoroEndInst *CE : CoroEnds)

- changeToUnreachable(CE, /*UseLLVMTrap=*/false);

+ changeToUnreachable(CE);

return;

}

@@ -399,11 +400,7 @@ void coro::Shape::buildFrom(Function &F) {

this->AsyncLowering.ContextAlignment =

AsyncId->getStorageAlignment().value();

this->AsyncLowering.AsyncFuncPointer = AsyncId->getAsyncFunctionPointer();

- auto &Context = F.getContext();

- auto *Int8PtrTy = Type::getInt8PtrTy(Context);

- auto *VoidTy = Type::getVoidTy(Context);

- this->AsyncLowering.AsyncFuncTy =

- FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy, Int8PtrTy}, false);

+ this->AsyncLowering.AsyncCC = F.getCallingConv();

break;

};

case Intrinsic::coro_id_retcon:

@@ -700,7 +697,7 @@ void CoroIdAsyncInst::checkWellFormed() const {

static void checkAsyncContextProjectFunction(const Instruction *I,

Function *F) {

- auto *FunTy = cast<FunctionType>(F->getType()->getPointerElementType());

+ auto *FunTy = cast<FunctionType>(F->getValueType());

if (!FunTy->getReturnType()->isPointerTy() ||

!FunTy->getReturnType()->getPointerElementType()->isIntegerTy(8))

fail(I,