src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
commit	344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
tree	f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Transforms/Coroutines/CoroFrame.cpp
parent	b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)

vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5 vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0e vendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f

Diffstat (limited to 'llvm/lib/Transforms/Coroutines/CoroFrame.cpp')

-rw-r--r--

llvm/lib/Transforms/Coroutines/CoroFrame.cpp

771

1 files changed, 590 insertions, 181 deletions

diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index e53e7605b254..beae5fdac8ab 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp

@@ -12,8 +12,6 @@

// contain those values. All uses of those values are replaced with appropriate

// GEP + load from the coroutine frame. At the point of the definition we spill

// the value into the coroutine frame.

-//

-// TODO: pack values tightly using liveness info.

//===----------------------------------------------------------------------===//

#include "CoroInternal.h"

@@ -32,6 +30,7 @@

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/OptimizedStructLayout.h"

#include "llvm/Support/circular_raw_ostream.h"

+#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Transforms/Utils/PromoteMemToReg.h"

@@ -163,6 +162,16 @@ struct SuspendCrossingInfo {

return isDefinitionAcrossSuspend(DefBB, U);

}

+ bool isDefinitionAcrossSuspend(Value &V, User *U) const {

+ if (auto *Arg = dyn_cast<Argument>(&V))

+ return isDefinitionAcrossSuspend(*Arg, U);

+ if (auto *Inst = dyn_cast<Instruction>(&V))

+ return isDefinitionAcrossSuspend(*Inst, U);

+ llvm_unreachable(

+ "Coroutine could only collect Argument and Instruction now.");

+ }

};

} // end anonymous namespace

@@ -336,6 +345,28 @@ struct FrameDataInfo {

FieldIndexMap[V] = Index;

}

+ uint64_t getAlign(Value *V) const {

+ auto Iter = FieldAlignMap.find(V);

+ assert(Iter != FieldAlignMap.end());

+ return Iter->second;

+ }

+ void setAlign(Value *V, uint64_t Align) {

+ assert(FieldAlignMap.count(V) == 0);

+ FieldAlignMap.insert({V, Align});

+ }

+ uint64_t getOffset(Value *V) const {

+ auto Iter = FieldOffsetMap.find(V);

+ assert(Iter != FieldOffsetMap.end());

+ return Iter->second;

+ }

+ void setOffset(Value *V, uint64_t Offset) {

+ assert(FieldOffsetMap.count(V) == 0);

+ FieldOffsetMap.insert({V, Offset});

+ }

// Remap the index of every field in the frame, using the final layout index.

void updateLayoutIndex(FrameTypeBuilder &B);

@@ -347,6 +378,12 @@ private:

// with their original insertion field index. After the frame is built, their

// indexes will be updated into the final layout index.

DenseMap<Value *, uint32_t> FieldIndexMap;

+ // Map from values to their alignment on the frame. They would be set after

+ // the frame is built.

+ DenseMap<Value *, uint64_t> FieldAlignMap;

+ // Map from values to their offset on the frame. They would be set after

+ // the frame is built.

+ DenseMap<Value *, uint64_t> FieldOffsetMap;

};

} // namespace

@@ -392,12 +429,15 @@ private:

Align StructAlign;

bool IsFinished = false;

+ Optional<Align> MaxFrameAlignment;

SmallVector<Field, 8> Fields;

DenseMap<Value*, unsigned> FieldIndexByKey;

public:

- FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)

- : DL(DL), Context(Context) {}

+ FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL,

+ Optional<Align> MaxFrameAlignment)

+ : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}

/// Add a field to this structure for the storage of an `alloca`

/// instruction.

@@ -448,17 +488,32 @@ public:

/// Add a field to this structure.

LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,

- bool IsHeader = false) {

+ bool IsHeader = false,

+ bool IsSpillOfValue = false) {

assert(!IsFinished && "adding fields to a finished builder");

assert(Ty && "must provide a type for a field");

// The field size is always the alloc size of the type.

uint64_t FieldSize = DL.getTypeAllocSize(Ty);

+ // For an alloca with size=0, we don't need to add a field and they

+ // can just point to any index in the frame. Use index 0.

+ if (FieldSize == 0) {

+ return 0;

+ }

// The field alignment might not be the type alignment, but we need

// to remember the type alignment anyway to build the type.

- Align TyAlignment = DL.getABITypeAlign(Ty);

- if (!FieldAlignment) FieldAlignment = TyAlignment;

+ // If we are spilling values we don't need to worry about ABI alignment

+ // concerns.

+ auto ABIAlign = DL.getABITypeAlign(Ty);

+ Align TyAlignment =

+ (IsSpillOfValue && MaxFrameAlignment)

+ ? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign)

+ : ABIAlign;

+ if (!FieldAlignment) {

+ FieldAlignment = TyAlignment;

+ }

// Lay out header fields immediately.

uint64_t Offset;

@@ -492,12 +547,20 @@ public:

assert(IsFinished && "not yet finished!");

return Fields[Id].LayoutFieldIndex;

}

+ Field getLayoutField(FieldIDType Id) const {

+ assert(IsFinished && "not yet finished!");

+ return Fields[Id];

+ }

};

} // namespace

void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {

auto Updater = [&](Value *I) {

- setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I)));

+ auto Field = B.getLayoutField(getFieldIndex(I));

+ setFieldIndex(I, Field.LayoutFieldIndex);

+ setAlign(I, Field.Alignment.value());

+ setOffset(I, Field.Offset);

};

LayoutIndexUpdateStarted = true;

for (auto &S : Spills)

@@ -510,7 +573,6 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {

void FrameTypeBuilder::addFieldForAllocas(const Function &F,

FrameDataInfo &FrameData,

coro::Shape &Shape) {

- DenseMap<AllocaInst *, unsigned int> AllocaIndex;

using AllocaSetType = SmallVector<AllocaInst *, 4>;

SmallVector<AllocaSetType, 4> NonOverlapedAllocas;

@@ -532,7 +594,6 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,

if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) {

for (const auto &A : FrameData.Allocas) {

AllocaInst *Alloca = A.Alloca;

- AllocaIndex[Alloca] = NonOverlapedAllocas.size();

NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));

}

return;

@@ -613,13 +674,11 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,

bool CouldMerge = NoInference && Alignable;

if (!CouldMerge)

continue;

- AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()];

AllocaSet.push_back(Alloca);

Merged = true;

break;

}

if (!Merged) {

- AllocaIndex[Alloca] = NonOverlapedAllocas.size();

NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));

}

@@ -716,6 +775,314 @@ void FrameTypeBuilder::finish(StructType *Ty) {

IsFinished = true;

}

+static void cacheDIVar(FrameDataInfo &FrameData,

+ DenseMap<Value *, DILocalVariable *> &DIVarCache) {

+ for (auto *V : FrameData.getAllDefs()) {

+ if (DIVarCache.find(V) != DIVarCache.end())

+ continue;

+ auto DDIs = FindDbgDeclareUses(V);

+ auto *I = llvm::find_if(DDIs, [](DbgDeclareInst *DDI) {

+ return DDI->getExpression()->getNumElements() == 0;

+ });

+ if (I != DDIs.end())

+ DIVarCache.insert({V, (*I)->getVariable()});

+ }

+/// Create name for Type. It uses MDString to store new created string to

+/// avoid memory leak.

+static StringRef solveTypeName(Type *Ty) {

+ if (Ty->isIntegerTy()) {

+ // The longest name in common may be '__int_128', which has 9 bits.

+ SmallString<16> Buffer;

+ raw_svector_ostream OS(Buffer);

+ OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();

+ auto *MDName = MDString::get(Ty->getContext(), OS.str());

+ return MDName->getString();

+ }

+ if (Ty->isFloatingPointTy()) {

+ if (Ty->isFloatTy())

+ return "__float_";

+ if (Ty->isDoubleTy())

+ return "__double_";

+ return "__floating_type_";

+ }

+ if (Ty->isPointerTy()) {

+ auto *PtrTy = cast<PointerType>(Ty);

+ Type *PointeeTy = PtrTy->getElementType();

+ auto Name = solveTypeName(PointeeTy);

+ if (Name == "UnknownType")

+ return "PointerType";

+ SmallString<16> Buffer;

+ Twine(Name + "_Ptr").toStringRef(Buffer);

+ auto *MDName = MDString::get(Ty->getContext(), Buffer.str());

+ return MDName->getString();

+ }

+ if (Ty->isStructTy()) {

+ if (!cast<StructType>(Ty)->hasName())

+ return "__LiteralStructType_";

+ auto Name = Ty->getStructName();

+ SmallString<16> Buffer(Name);

+ for_each(Buffer, [](auto &Iter) {

+ if (Iter == '.' || Iter == ':')

+ Iter = '_';

+ });

+ auto *MDName = MDString::get(Ty->getContext(), Buffer.str());

+ return MDName->getString();

+ }

+ return "UnknownType";

+static DIType *solveDIType(DIBuilder &Builder, Type *Ty, DataLayout &Layout,

+ DIScope *Scope, unsigned LineNum,

+ DenseMap<Type *, DIType *> &DITypeCache) {

+ if (DIType *DT = DITypeCache.lookup(Ty))

+ return DT;

+ StringRef Name = solveTypeName(Ty);

+ DIType *RetType = nullptr;

+ if (Ty->isIntegerTy()) {

+ auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();

+ RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,

+ llvm::DINode::FlagArtificial);

+ } else if (Ty->isFloatingPointTy()) {

+ RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),

+ dwarf::DW_ATE_float,

+ llvm::DINode::FlagArtificial);

+ } else if (Ty->isPointerTy()) {

+ // Construct BasicType instead of PointerType to avoid infinite

+ // search problem.

+ // For example, we would be in trouble if we traverse recursively:

+ //

+ // struct Node {

+ // Node* ptr;

+ // };

+ RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),

+ dwarf::DW_ATE_address,

+ llvm::DINode::FlagArtificial);

+ } else if (Ty->isStructTy()) {

+ auto *DIStruct = Builder.createStructType(

+ Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),

+ Layout.getPrefTypeAlignment(Ty), llvm::DINode::FlagArtificial, nullptr,

+ llvm::DINodeArray());

+ auto *StructTy = cast<StructType>(Ty);

+ SmallVector<Metadata *, 16> Elements;

+ for (unsigned I = 0; I < StructTy->getNumElements(); I++) {

+ DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,

+ Scope, LineNum, DITypeCache);

+ assert(DITy);

+ Elements.push_back(Builder.createMemberType(

+ Scope, DITy->getName(), Scope->getFile(), LineNum,

+ DITy->getSizeInBits(), DITy->getAlignInBits(),

+ Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),

+ llvm::DINode::FlagArtificial, DITy));

+ }

+ Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));

+ RetType = DIStruct;

+ } else {

+ LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n";);

+ SmallString<32> Buffer;

+ raw_svector_ostream OS(Buffer);

+ OS << Name.str() << "_" << Layout.getTypeSizeInBits(Ty);

+ RetType = Builder.createBasicType(OS.str(), Layout.getTypeSizeInBits(Ty),

+ dwarf::DW_ATE_address,

+ llvm::DINode::FlagArtificial);

+ }

+ DITypeCache.insert({Ty, RetType});

+ return RetType;

+/// Build artificial debug info for C++ coroutine frames to allow users to

+/// inspect the contents of the frame directly

+///

+/// Create Debug information for coroutine frame with debug name "__coro_frame".

+/// The debug information for the fields of coroutine frame is constructed from

+/// the following way:

+/// 1. For all the value in the Frame, we search the use of dbg.declare to find

+/// the corresponding debug variables for the value. If we can find the

+/// debug variable, we can get full and accurate debug information.

+/// 2. If we can't get debug information in step 1 and 2, we could only try to

+/// build the DIType by Type. We did this in solveDIType. We only handle

+/// integer, float, double, integer type and struct type for now.

+static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,

+ FrameDataInfo &FrameData) {

+ DISubprogram *DIS = F.getSubprogram();

+ // If there is no DISubprogram for F, it implies the Function are not compiled

+ // with debug info. So we also don't need to generate debug info for the frame

+ // neither.

+ if (!DIS || !DIS->getUnit() ||

+ !dwarf::isCPlusPlus(

+ (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()))

+ return;

+ assert(Shape.ABI == coro::ABI::Switch &&

+ "We could only build debug infomation for C++ coroutine now.\n");

+ DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);

+ AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();

+ assert(PromiseAlloca &&

+ "Coroutine with switch ABI should own Promise alloca");

+ TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(PromiseAlloca);

+ if (DIs.empty())

+ return;

+ DbgDeclareInst *PromiseDDI = DIs.front();

+ DILocalVariable *PromiseDIVariable = PromiseDDI->getVariable();

+ DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();

+ DIFile *DFile = PromiseDIScope->getFile();

+ DILocation *DILoc = PromiseDDI->getDebugLoc().get();

+ unsigned LineNum = PromiseDIVariable->getLine();

+ DICompositeType *FrameDITy = DBuilder.createStructType(

+ DIS, "__coro_frame_ty", DFile, LineNum, Shape.FrameSize * 8,

+ Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,

+ llvm::DINodeArray());

+ StructType *FrameTy = Shape.FrameTy;

+ SmallVector<Metadata *, 16> Elements;

+ DataLayout Layout = F.getParent()->getDataLayout();

+ DenseMap<Value *, DILocalVariable *> DIVarCache;

+ cacheDIVar(FrameData, DIVarCache);

+ unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;

+ unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;

+ unsigned IndexIndex = Shape.SwitchLowering.IndexField;

+ DenseMap<unsigned, StringRef> NameCache;

+ NameCache.insert({ResumeIndex, "__resume_fn"});

+ NameCache.insert({DestroyIndex, "__destroy_fn"});

+ NameCache.insert({IndexIndex, "__coro_index"});

+ Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),

+ *DestroyFnTy = FrameTy->getElementType(DestroyIndex),

+ *IndexTy = FrameTy->getElementType(IndexIndex);

+ DenseMap<unsigned, DIType *> TyCache;

+ TyCache.insert({ResumeIndex,

+ DBuilder.createBasicType("__resume_fn",

+ Layout.getTypeSizeInBits(ResumeFnTy),

+ dwarf::DW_ATE_address)});

+ TyCache.insert(

+ {DestroyIndex, DBuilder.createBasicType(

+ "__destroy_fn", Layout.getTypeSizeInBits(DestroyFnTy),

+ dwarf::DW_ATE_address)});

+ /// FIXME: If we fill the field `SizeInBits` with the actual size of

+ /// __coro_index in bits, then __coro_index wouldn't show in the debugger.

+ TyCache.insert({IndexIndex, DBuilder.createBasicType(

+ "__coro_index",

+ (Layout.getTypeSizeInBits(IndexTy) < 8)

+ ? 8

+ : Layout.getTypeSizeInBits(IndexTy),

+ dwarf::DW_ATE_unsigned_char)});

+ for (auto *V : FrameData.getAllDefs()) {

+ if (DIVarCache.find(V) == DIVarCache.end())

+ continue;

+ auto Index = FrameData.getFieldIndex(V);

+ NameCache.insert({Index, DIVarCache[V]->getName()});

+ TyCache.insert({Index, DIVarCache[V]->getType()});

+ }

+ // Cache from index to (Align, Offset Pair)

+ DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;

+ // The Align and Offset of Resume function and Destroy function are fixed.

+ OffsetCache.insert({ResumeIndex, {8, 0}});

+ OffsetCache.insert({DestroyIndex, {8, 8}});

+ OffsetCache.insert(

+ {IndexIndex,

+ {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});

+ for (auto *V : FrameData.getAllDefs()) {

+ auto Index = FrameData.getFieldIndex(V);

+ OffsetCache.insert(

+ {Index, {FrameData.getAlign(V), FrameData.getOffset(V)}});

+ }

+ DenseMap<Type *, DIType *> DITypeCache;

+ // This counter is used to avoid same type names. e.g., there would be

+ // many i32 and i64 types in one coroutine. And we would use i32_0 and

+ // i32_1 to avoid the same type. Since it makes no sense the name of the

+ // fields confilicts with each other.

+ unsigned UnknownTypeNum = 0;

+ for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {

+ if (OffsetCache.find(Index) == OffsetCache.end())

+ continue;

+ std::string Name;

+ uint64_t SizeInBits;

+ uint32_t AlignInBits;

+ uint64_t OffsetInBits;

+ DIType *DITy = nullptr;

+ Type *Ty = FrameTy->getElementType(Index);

+ assert(Ty->isSized() && "We can't handle type which is not sized.\n");

+ SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedSize();

+ AlignInBits = OffsetCache[Index].first * 8;

+ OffsetInBits = OffsetCache[Index].second * 8;

+ if (NameCache.find(Index) != NameCache.end()) {

+ Name = NameCache[Index].str();

+ DITy = TyCache[Index];

+ } else {

+ DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);

+ assert(DITy && "SolveDIType shouldn't return nullptr.\n");

+ Name = DITy->getName().str();

+ Name += "_" + std::to_string(UnknownTypeNum);

+ UnknownTypeNum++;

+ }

+ Elements.push_back(DBuilder.createMemberType(

+ FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,

+ llvm::DINode::FlagArtificial, DITy));

+ }

+ DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));

+ auto *FrameDIVar = DBuilder.createAutoVariable(PromiseDIScope, "__coro_frame",

+ DFile, LineNum, FrameDITy,

+ true, DINode::FlagArtificial);

+ assert(FrameDIVar->isValidLocationForIntrinsic(PromiseDDI->getDebugLoc()));

+ // Subprogram would have ContainedNodes field which records the debug

+ // variables it contained. So we need to add __coro_frame to the

+ // ContainedNodes of it.

+ //

+ // If we don't add __coro_frame to the RetainedNodes, user may get

+ // `no symbol __coro_frame in context` rather than `__coro_frame`

+ // is optimized out, which is more precise.

+ if (auto *SubProgram = dyn_cast<DISubprogram>(PromiseDIScope)) {

+ auto RetainedNodes = SubProgram->getRetainedNodes();

+ SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),

+ RetainedNodes.end());

+ RetainedNodesVec.push_back(FrameDIVar);

+ SubProgram->replaceOperandWith(

+ 7, (MDTuple::get(F.getContext(), RetainedNodesVec)));

+ }

+ DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,

+ DBuilder.createExpression(), DILoc,

+ Shape.FramePtr->getNextNode());

// Build a struct that will keep state for an active coroutine.

// struct f.frame {

// ResumeFnTy ResumeFnAddr;

@@ -734,7 +1101,11 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,

return StructType::create(C, Name);

}();

- FrameTypeBuilder B(C, DL);

+ // We will use this value to cap the alignment of spilled values.

+ Optional<Align> MaxFrameAlignment;

+ if (Shape.ABI == coro::ABI::Async)

+ MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();

+ FrameTypeBuilder B(C, DL, MaxFrameAlignment);

AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();

Optional<FieldIDType> SwitchIndexFieldId;

@@ -781,7 +1152,14 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,

PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{}, false);

// Create an entry for every spilled value.

for (auto &S : FrameData.Spills) {

- FieldIDType Id = B.addField(S.first->getType(), None);

+ Type *FieldType = S.first->getType();

+ // For byval arguments, we need to store the pointed value in the frame,

+ // instead of the pointer itself.

+ if (const Argument *A = dyn_cast<Argument>(S.first))

+ if (A->hasByValAttr())

+ FieldType = A->getParamByValType();

+ FieldIDType Id =

+ B.addField(FieldType, None, false /*header*/, true /*IsSpillOfValue*/);

FrameData.setFieldIndex(S.first, Id);

}

@@ -791,15 +1169,18 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,

Shape.FrameSize = B.getStructSize();

switch (Shape.ABI) {

- case coro::ABI::Switch:

+ case coro::ABI::Switch: {

// In the switch ABI, remember the switch-index field.

- Shape.SwitchLowering.IndexField =

- B.getLayoutFieldIndex(*SwitchIndexFieldId);

+ auto IndexField = B.getLayoutField(*SwitchIndexFieldId);

+ Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;

+ Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();

+ Shape.SwitchLowering.IndexOffset = IndexField.Offset;

// Also round the frame size up to a multiple of its alignment, as is

// generally expected in C/C++.

Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);

break;

+ }

// In the retcon ABI, remember whether the frame is inline in the storage.

case coro::ABI::Retcon:

@@ -863,7 +1244,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {

: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {}

void visit(Instruction &I) {

- UserBBs.insert(I.getParent());

+ Users.insert(&I);

Base::visit(I);

// If the pointer is escaped prior to CoroBegin, we have to assume it would

// be written into before CoroBegin as well.

@@ -966,6 +1347,12 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {

handleAlias(GEPI);

}

+ void visitIntrinsicInst(IntrinsicInst &II) {

+ if (II.getIntrinsicID() != Intrinsic::lifetime_start)

+ return Base::visitIntrinsicInst(II);

+ LifetimeStarts.insert(&II);

+ }

void visitCallBase(CallBase &CB) {

for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op)

if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))

@@ -999,18 +1386,40 @@ private:

// after CoroBegin. Each entry contains the instruction and the offset in the

// original Alloca. They need to be recreated after CoroBegin off the frame.

DenseMap<Instruction *, llvm::Optional<APInt>> AliasOffetMap{};

- SmallPtrSet<BasicBlock *, 2> UserBBs{};

+ SmallPtrSet<Instruction *, 4> Users{};

+ SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};

bool MayWriteBeforeCoroBegin{false};

mutable llvm::Optional<bool> ShouldLiveOnFrame{};

bool computeShouldLiveOnFrame() const {

+ // If lifetime information is available, we check it first since it's

+ // more precise. We look at every pair of lifetime.start intrinsic and

+ // every basic block that uses the pointer to see if they cross suspension

+ // points. The uses cover both direct uses as well as indirect uses.

+ if (!LifetimeStarts.empty()) {

+ for (auto *I : Users)

+ for (auto *S : LifetimeStarts)

+ if (Checker.isDefinitionAcrossSuspend(*S, I))

+ return true;

+ return false;

+ }

+ // FIXME: Ideally the isEscaped check should come at the beginning.

+ // However there are a few loose ends that need to be fixed first before

+ // we can do that. We need to make sure we are not over-conservative, so

+ // that the data accessed in-between await_suspend and symmetric transfer

+ // is always put on the stack, and also data accessed after coro.end is

+ // always put on the stack (esp the return object). To fix that, we need

+ // to:

+ // 1) Potentially treat sret as nocapture in calls

+ // 2) Special handle the return object and put it on the stack

+ // 3) Utilize lifetime.end intrinsic

if (PI.isEscaped())

return true;

- for (auto *BB1 : UserBBs)

- for (auto *BB2 : UserBBs)

- if (Checker.hasPathCrossingSuspendPoint(BB1, BB2))

+ for (auto *U1 : Users)

+ for (auto *U2 : Users)

+ if (Checker.isDefinitionAcrossSuspend(*U1, U2))

return true;

return false;

@@ -1072,6 +1481,15 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {

return CleanupRet;

}

+static void createFramePtr(coro::Shape &Shape) {

+ auto *CB = Shape.CoroBegin;

+ IRBuilder<> Builder(CB->getNextNode());

+ StructType *FrameTy = Shape.FrameTy;

+ PointerType *FramePtrTy = FrameTy->getPointerTo();

+ Shape.FramePtr =

+ cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));

// Replace all alloca and SSA values that are accessed across suspend points

// with GetElementPointer from coroutine frame + loads and stores. Create an

// AllocaSpillBB that will become the new entry block for the resume parts of

@@ -1098,11 +1516,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

coro::Shape &Shape) {

auto *CB = Shape.CoroBegin;

LLVMContext &C = CB->getContext();

- IRBuilder<> Builder(CB->getNextNode());

+ IRBuilder<> Builder(C);

StructType *FrameTy = Shape.FrameTy;

- PointerType *FramePtrTy = FrameTy->getPointerTo();

- auto *FramePtr =

- cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));

+ Instruction *FramePtr = Shape.FramePtr;

DominatorTree DT(*CB->getFunction());

SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;

@@ -1146,9 +1562,11 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

for (auto const &E : FrameData.Spills) {

Value *Def = E.first;

+ auto SpillAlignment = Align(FrameData.getAlign(Def));

// Create a store instruction storing the value into the

// coroutine frame.

Instruction *InsertPt = nullptr;

+ bool NeedToCopyArgPtrValue = false;

if (auto *Arg = dyn_cast<Argument>(Def)) {

// For arguments, we will place the store instruction right after

// the coroutine frame pointer instruction, i.e. bitcast of

@@ -1159,6 +1577,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

// from the coroutine function.

Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);

+ if (Arg->hasByValAttr())

+ NeedToCopyArgPtrValue = true;

} else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {

// Don't spill immediately after a suspend; splitting assumes

// that the suspend will be followed by a branch.

@@ -1193,7 +1614,15 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

Builder.SetInsertPoint(InsertPt);

auto *G = Builder.CreateConstInBoundsGEP2_32(

FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));

- Builder.CreateStore(Def, G);

+ if (NeedToCopyArgPtrValue) {

+ // For byval arguments, we need to store the pointed value in the frame,

+ // instead of the pointer itself.

+ auto *Value =

+ Builder.CreateLoad(Def->getType()->getPointerElementType(), Def);

+ Builder.CreateAlignedStore(Value, G, SpillAlignment);

+ } else {

+ Builder.CreateAlignedStore(Def, G, SpillAlignment);

+ }

BasicBlock *CurrentBlock = nullptr;

Value *CurrentReload = nullptr;

@@ -1207,9 +1636,12 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

auto *GEP = GetFramePointer(E.first);

GEP->setName(E.first->getName() + Twine(".reload.addr"));

- CurrentReload = Builder.CreateLoad(

- FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,

- E.first->getName() + Twine(".reload"));

+ if (NeedToCopyArgPtrValue)

+ CurrentReload = GEP;

+ else

+ CurrentReload = Builder.CreateAlignedLoad(

+ FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,

+ SpillAlignment, E.first->getName() + Twine(".reload"));

TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);

for (DbgDeclareInst *DDI : DIs) {

@@ -1223,7 +1655,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

&*Builder.GetInsertPoint());

// This dbg.declare is for the main function entry point. It

// will be deleted in all coro-split functions.

- coro::salvageDebugInfo(DbgPtrAllocaCache, DDI);

+ coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.ReuseFrameSlot);

}

@@ -1271,8 +1703,8 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

}

// If we found any alloca, replace all of their remaining uses with GEP

- // instructions. Because new dbg.declare have been created for these alloca,

- // we also delete the original dbg.declare and replace other uses with undef.

+ // instructions. To remain debugbility, we replace the uses of allocas for

+ // dbg.declares and dbg.values with the reload from the frame.

// Note: We cannot replace the alloca with GEP instructions indiscriminately,

// as some of the uses may not be dominated by CoroBegin.

Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());

@@ -1290,17 +1722,10 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

auto *G = GetFramePointer(Alloca);

G->setName(Alloca->getName() + Twine(".reload.addr"));

- SmallPtrSet<BasicBlock *, 4> SeenDbgBBs;

- TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Alloca);

- if (!DIs.empty())

- DIBuilder(*Alloca->getModule(),

- /*AllowUnresolved*/ false)

- .insertDeclare(G, DIs.front()->getVariable(),

- DIs.front()->getExpression(),

- DIs.front()->getDebugLoc(), DIs.front());

- for (auto *DI : FindDbgDeclareUses(Alloca))

- DI->eraseFromParent();

- replaceDbgUsesWithUndef(Alloca);

+ SmallVector<DbgVariableIntrinsic *, 4> DIs;

+ findDbgUsers(DIs, Alloca);

+ for (auto *DVI : DIs)

+ DVI->replaceUsesOfWith(Alloca, G);

for (Instruction *I : UsersToUpdate)

I->replaceUsesOfWith(Alloca, G);

@@ -1326,7 +1751,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

auto *FramePtrRaw =

Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));

auto *AliasPtr = Builder.CreateGEP(

- FramePtrRaw,

+ Type::getInt8Ty(C), FramePtrRaw,

ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue()));

auto *AliasPtrTyped =

Builder.CreateBitCast(AliasPtr, Alias.first->getType());

@@ -1337,77 +1762,6 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

return FramePtr;

}

-// Sets the unwind edge of an instruction to a particular successor.

-static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {

- if (auto *II = dyn_cast<InvokeInst>(TI))

- II->setUnwindDest(Succ);

- else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))

- CS->setUnwindDest(Succ);

- else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))

- CR->setUnwindDest(Succ);

- else

- llvm_unreachable("unexpected terminator instruction");

-// Replaces all uses of OldPred with the NewPred block in all PHINodes in a

-// block.

-static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,

- BasicBlock *NewPred, PHINode *Until = nullptr) {

- unsigned BBIdx = 0;

- for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {

- PHINode *PN = cast<PHINode>(I);

- // We manually update the LandingPadReplacement PHINode and it is the last

- // PHI Node. So, if we find it, we are done.

- if (Until == PN)

- break;

- // Reuse the previous value of BBIdx if it lines up. In cases where we

- // have multiple phi nodes with *lots* of predecessors, this is a speed

- // win because we don't have to scan the PHI looking for TIBB. This

- // happens because the BB list of PHI nodes are usually in the same

- // order.

- if (PN->getIncomingBlock(BBIdx) != OldPred)

- BBIdx = PN->getBasicBlockIndex(OldPred);

- assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!");

- PN->setIncomingBlock(BBIdx, NewPred);

- }

-// Uses SplitEdge unless the successor block is an EHPad, in which case do EH

-// specific handling.

-static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,

- LandingPadInst *OriginalPad,

- PHINode *LandingPadReplacement) {

- auto *PadInst = Succ->getFirstNonPHI();

- if (!LandingPadReplacement && !PadInst->isEHPad())

- return SplitEdge(BB, Succ);

- auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ);

- setUnwindEdgeTo(BB->getTerminator(), NewBB);

- updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);

- if (LandingPadReplacement) {

- auto *NewLP = OriginalPad->clone();

- auto *Terminator = BranchInst::Create(Succ, NewBB);

- NewLP->insertBefore(Terminator);

- LandingPadReplacement->addIncoming(NewLP, NewBB);

- return NewBB;

- }

- Value *ParentPad = nullptr;

- if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))

- ParentPad = FuncletPad->getParentPad();

- else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))

- ParentPad = CatchSwitch->getParentPad();

- else

- llvm_unreachable("handling for other EHPads not implemented yet");

- auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB);

- CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);

- return NewBB;

// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new

// PHI in InsertedBB.

static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,

@@ -1503,6 +1857,24 @@ static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,

}

+static void cleanupSinglePredPHIs(Function &F) {

+ SmallVector<PHINode *, 32> Worklist;

+ for (auto &BB : F) {

+ for (auto &Phi : BB.phis()) {

+ if (Phi.getNumIncomingValues() == 1) {

+ Worklist.push_back(&Phi);

+ } else

+ break;

+ }

+ while (!Worklist.empty()) {

+ auto *Phi = Worklist.back();

+ Worklist.pop_back();

+ auto *OriginalValue = Phi->getIncomingValue(0);

+ Phi->replaceAllUsesWith(OriginalValue);

+ }

static void rewritePHIs(BasicBlock &BB) {

// For every incoming edge we will create a block holding all

// incoming values in a single PHI nodes.

@@ -1610,11 +1982,16 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB,

for (Instruction *U : E.second) {

// If we have not seen this block, materialize the value.

if (CurrentBlock != U->getParent()) {

- CurrentBlock = U->getParent();

+ bool IsInCoroSuspendBlock = isa<AnyCoroSuspendInst>(U);

+ CurrentBlock = IsInCoroSuspendBlock

+ ? U->getParent()->getSinglePredecessor()

+ : U->getParent();

CurrentMaterialization = cast<Instruction>(Def)->clone();

CurrentMaterialization->setName(Def->getName());

CurrentMaterialization->insertBefore(

- &*CurrentBlock->getFirstInsertionPt());

+ IsInCoroSuspendBlock ? CurrentBlock->getTerminator()

+ : &*CurrentBlock->getFirstInsertionPt());

}

if (auto *PN = dyn_cast<PHINode>(U)) {

assert(PN->getNumIncomingValues() == 1 &&

@@ -2101,24 +2478,6 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,

static void collectFrameAllocas(Function &F, coro::Shape &Shape,

const SuspendCrossingInfo &Checker,

SmallVectorImpl<AllocaInfo> &Allocas) {

- // Collect lifetime.start info for each alloca.

- using LifetimeStart = SmallPtrSet<Instruction *, 2>;

- llvm::DenseMap<AllocaInst *, std::unique_ptr<LifetimeStart>> LifetimeMap;

- for (Instruction &I : instructions(F)) {

- auto *II = dyn_cast<IntrinsicInst>(&I);

- if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)

- continue;

- if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) {

- if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) {

- if (LifetimeMap.find(AI) == LifetimeMap.end())

- LifetimeMap[AI] = std::make_unique<LifetimeStart>();

- LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst);

- }

for (Instruction &I : instructions(F)) {

auto *AI = dyn_cast<AllocaInst>(&I);

if (!AI)

@@ -2128,23 +2487,6 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,

if (AI == Shape.SwitchLowering.PromiseAlloca) {

continue;

}

- bool ShouldLiveOnFrame = false;

- auto Iter = LifetimeMap.find(AI);

- if (Iter != LifetimeMap.end()) {

- // Check against lifetime.start if the instruction has the info.

- for (User *U : I.users()) {

- for (auto *S : *Iter->second)

- if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U)))

- break;

- if (ShouldLiveOnFrame)

- break;

- }

- if (!ShouldLiveOnFrame)

- continue;

- }

- // At this point, either ShouldLiveOnFrame is true or we didn't have

- // lifetime information. We will need to rely on more precise pointer

- // tracking.

DominatorTree DT(F);

AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,

*Shape.CoroBegin, Checker};

@@ -2158,58 +2500,94 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,

void coro::salvageDebugInfo(

SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,

- DbgDeclareInst *DDI, bool LoadFromFramePtr) {

- Function *F = DDI->getFunction();

+ DbgVariableIntrinsic *DVI, bool ReuseFrameSlot) {

+ Function *F = DVI->getFunction();

IRBuilder<> Builder(F->getContext());

auto InsertPt = F->getEntryBlock().getFirstInsertionPt();

while (isa<IntrinsicInst>(InsertPt))

++InsertPt;

Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);

- DIExpression *Expr = DDI->getExpression();

+ DIExpression *Expr = DVI->getExpression();

// Follow the pointer arithmetic all the way to the incoming

// function argument and convert into a DIExpression.

- Value *Storage = DDI->getAddress();

+ bool OutermostLoad = true;

+ Value *Storage = DVI->getVariableLocationOp(0);

+ Value *OriginalStorage = Storage;

while (Storage) {

if (auto *LdInst = dyn_cast<LoadInst>(Storage)) {

Storage = LdInst->getOperand(0);

+ // FIXME: This is a heuristic that works around the fact that

+ // LLVM IR debug intrinsics cannot yet distinguish between

+ // memory and value locations: Because a dbg.declare(alloca) is

+ // implicitly a memory location no DW_OP_deref operation for the

+ // last direct load from an alloca is necessary. This condition

+ // effectively drops the *last* DW_OP_deref in the expression.

+ if (!OutermostLoad)

+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);

+ OutermostLoad = false;

} else if (auto *StInst = dyn_cast<StoreInst>(Storage)) {

Storage = StInst->getOperand(0);

} else if (auto *GEPInst = dyn_cast<GetElementPtrInst>(Storage)) {

- Expr = llvm::salvageDebugInfoImpl(*GEPInst, Expr,

- /*WithStackValue=*/false);

+ SmallVector<Value *> AdditionalValues;

+ DIExpression *SalvagedExpr = llvm::salvageDebugInfoImpl(

+ *GEPInst, Expr,

+ /*WithStackValue=*/false, 0, AdditionalValues);

+ // Debug declares cannot currently handle additional location

+ // operands.

+ if (!SalvagedExpr || !AdditionalValues.empty())

+ break;

+ Expr = SalvagedExpr;

Storage = GEPInst->getOperand(0);

} else if (auto *BCInst = dyn_cast<llvm::BitCastInst>(Storage))

Storage = BCInst->getOperand(0);

else

break;

}

+ if (!Storage)

+ return;

// Store a pointer to the coroutine frame object in an alloca so it

// is available throughout the function when producing unoptimized

// code. Extending the lifetime this way is correct because the

// variable has been declared by a dbg.declare intrinsic.

- if (auto Arg = dyn_cast_or_null<llvm::Argument>(Storage)) {

- auto &Cached = DbgPtrAllocaCache[Storage];

- if (!Cached) {

- Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,

- Arg->getName() + ".debug");

- Builder.CreateStore(Storage, Cached);

+ //

+ // Avoid to create the alloca would be eliminated by optimization

+ // passes and the corresponding dbg.declares would be invalid.

+ if (!ReuseFrameSlot && !EnableReuseStorageInFrame)

+ if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {

+ auto &Cached = DbgPtrAllocaCache[Storage];

+ if (!Cached) {

+ Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,

+ Arg->getName() + ".debug");

+ Builder.CreateStore(Storage, Cached);

+ }

+ Storage = Cached;

+ // FIXME: LLVM lacks nuanced semantics to differentiate between

+ // memory and direct locations at the IR level. The backend will

+ // turn a dbg.declare(alloca, ..., DIExpression()) into a memory

+ // location. Thus, if there are deref and offset operations in the

+ // expression, we need to add a DW_OP_deref at the *start* of the

+ // expression to first load the contents of the alloca before

+ // adjusting it with the expression.

+ if (Expr && Expr->isComplex())

+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);

}

- Storage = Cached;

- }

- // The FramePtr object adds one extra layer of indirection that

- // needs to be unwrapped.

- if (LoadFromFramePtr)

- Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);

- auto &VMContext = DDI->getFunction()->getContext();

- DDI->setOperand(

- 0, MetadataAsValue::get(VMContext, ValueAsMetadata::get(Storage)));

- DDI->setOperand(2, MetadataAsValue::get(VMContext, Expr));

- if (auto *InsertPt = dyn_cast_or_null<Instruction>(Storage))

- DDI->moveAfter(InsertPt);

+ DVI->replaceVariableLocationOp(OriginalStorage, Storage);

+ DVI->setExpression(Expr);

+ /// It makes no sense to move the dbg.value intrinsic.

+ if (!isa<DbgValueInst>(DVI)) {

+ if (auto *InsertPt = dyn_cast<Instruction>(Storage))

+ DVI->moveAfter(InsertPt);

+ else if (isa<Argument>(Storage))

+ DVI->moveAfter(F->getEntryBlock().getFirstNonPHI());

+ }

}

void coro::buildCoroutineFrame(Function &F, Shape &Shape) {

- eliminateSwiftError(F, Shape);

+ // Don't eliminate swifterror in async functions that won't be split.

+ if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())

+ eliminateSwiftError(F, Shape);

if (Shape.ABI == coro::ABI::Switch &&

Shape.SwitchLowering.PromiseAlloca) {

@@ -2246,6 +2624,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {

}

+ // Later code makes structural assumptions about single predecessors phis e.g

+ // that they are not live accross a suspend point.

+ cleanupSinglePredPHIs(F);

// Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will

// never has its definition separated from the PHI by the suspend point.

rewritePHIs(F);

@@ -2263,11 +2645,19 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {

for (int Repeat = 0; Repeat < 4; ++Repeat) {

// See if there are materializable instructions across suspend points.

for (Instruction &I : instructions(F))

- if (materializable(I))

+ if (materializable(I)) {

for (User *U : I.users())

if (Checker.isDefinitionAcrossSuspend(I, U))

Spills[&I].push_back(cast<Instruction>(U));

+ // Manually add dbg.value metadata uses of I.

+ SmallVector<DbgValueInst *, 16> DVIs;

+ findDbgValues(DVIs, &I);

+ for (auto *DVI : DVIs)

+ if (Checker.isDefinitionAcrossSuspend(I, DVI))

+ Spills[&I].push_back(DVI);

+ }

if (Spills.empty())

break;

@@ -2280,7 +2670,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {

}

sinkLifetimeStartMarkers(F, Shape, Checker);

- collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);

+ if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())

+ collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);

LLVM_DEBUG(dumpAllocas(FrameData.Allocas));

// Collect the spills for arguments and other not-materializable values.

@@ -2339,12 +2730,30 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {

FrameData.Spills[&I].push_back(cast<Instruction>(U));

}

+ // We don't want the layout of coroutine frame to be affected

+ // by debug information. So we only choose to salvage DbgValueInst for

+ // whose value is already in the frame.

+ // We would handle the dbg.values for allocas specially

+ for (auto &Iter : FrameData.Spills) {

+ auto *V = Iter.first;

+ SmallVector<DbgValueInst *, 16> DVIs;

+ findDbgValues(DVIs, V);

+ llvm::for_each(DVIs, [&](DbgValueInst *DVI) {

+ if (Checker.isDefinitionAcrossSuspend(*V, DVI))

+ FrameData.Spills[V].push_back(DVI);

+ });

+ }

LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));

if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||

Shape.ABI == coro::ABI::Async)

sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin);

Shape.FrameTy = buildFrameType(F, Shape, FrameData);

- Shape.FramePtr = insertSpills(FrameData, Shape);

+ createFramePtr(Shape);

+ // For now, this works for C++ programs only.

+ buildFrameDebugInfo(F, Shape, FrameData);

+ insertSpills(FrameData, Shape);

lowerLocalAllocas(LocalAllocas, DeadInstructions);

for (auto I : DeadInstructions)