diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 | 
| commit | 344a3780b2e33f6ca763666c380202b18aab72a3 (patch) | |
| tree | f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Transforms/Coroutines/CoroFrame.cpp | |
| parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) | |
vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0evendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f
Diffstat (limited to 'llvm/lib/Transforms/Coroutines/CoroFrame.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 771 | 
1 files changed, 590 insertions, 181 deletions
| diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index e53e7605b254..beae5fdac8ab 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -12,8 +12,6 @@  // contain those values. All uses of those values are replaced with appropriate  // GEP + load from the coroutine frame. At the point of the definition we spill  // the value into the coroutine frame. -// -// TODO: pack values tightly using liveness info.  //===----------------------------------------------------------------------===//  #include "CoroInternal.h" @@ -32,6 +30,7 @@  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/OptimizedStructLayout.h"  #include "llvm/Support/circular_raw_ostream.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -163,6 +162,16 @@ struct SuspendCrossingInfo {      return isDefinitionAcrossSuspend(DefBB, U);    } + +  bool isDefinitionAcrossSuspend(Value &V, User *U) const { +    if (auto *Arg = dyn_cast<Argument>(&V)) +      return isDefinitionAcrossSuspend(*Arg, U); +    if (auto *Inst = dyn_cast<Instruction>(&V)) +      return isDefinitionAcrossSuspend(*Inst, U); + +    llvm_unreachable( +        "Coroutine could only collect Argument and Instruction now."); +  }  };  } // end anonymous namespace @@ -336,6 +345,28 @@ struct FrameDataInfo {      FieldIndexMap[V] = Index;    } +  uint64_t getAlign(Value *V) const { +    auto Iter = FieldAlignMap.find(V); +    assert(Iter != FieldAlignMap.end()); +    return Iter->second; +  } + +  void setAlign(Value *V, uint64_t Align) { +    assert(FieldAlignMap.count(V) == 0); +    FieldAlignMap.insert({V, Align}); +  } + +  uint64_t getOffset(Value *V) const { +    auto Iter = FieldOffsetMap.find(V); +    assert(Iter != FieldOffsetMap.end()); +    return Iter->second; +  } + +  void setOffset(Value *V, uint64_t Offset) { +    assert(FieldOffsetMap.count(V) == 0); +    FieldOffsetMap.insert({V, Offset}); +  } +    // Remap the index of every field in the frame, using the final layout index.    void updateLayoutIndex(FrameTypeBuilder &B); @@ -347,6 +378,12 @@ private:    // with their original insertion field index. After the frame is built, their    // indexes will be updated into the final layout index.    DenseMap<Value *, uint32_t> FieldIndexMap; +  // Map from values to their alignment on the frame. They would be set after +  // the frame is built. +  DenseMap<Value *, uint64_t> FieldAlignMap; +  // Map from values to their offset on the frame. They would be set after +  // the frame is built. +  DenseMap<Value *, uint64_t> FieldOffsetMap;  };  } // namespace @@ -392,12 +429,15 @@ private:    Align StructAlign;    bool IsFinished = false; +  Optional<Align> MaxFrameAlignment; +    SmallVector<Field, 8> Fields;    DenseMap<Value*, unsigned> FieldIndexByKey;  public: -  FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) -      : DL(DL), Context(Context) {} +  FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL, +                   Optional<Align> MaxFrameAlignment) +      : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}    /// Add a field to this structure for the storage of an `alloca`    /// instruction. @@ -448,17 +488,32 @@ public:    /// Add a field to this structure.    LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment, -                                      bool IsHeader = false) { +                                      bool IsHeader = false, +                                      bool IsSpillOfValue = false) {      assert(!IsFinished && "adding fields to a finished builder");      assert(Ty && "must provide a type for a field");      // The field size is always the alloc size of the type.      uint64_t FieldSize = DL.getTypeAllocSize(Ty); +    // For an alloca with size=0, we don't need to add a field and they +    // can just point to any index in the frame. Use index 0. +    if (FieldSize == 0) { +      return 0; +    } +      // The field alignment might not be the type alignment, but we need      // to remember the type alignment anyway to build the type. -    Align TyAlignment = DL.getABITypeAlign(Ty); -    if (!FieldAlignment) FieldAlignment = TyAlignment; +    // If we are spilling values we don't need to worry about ABI alignment +    // concerns. +    auto ABIAlign = DL.getABITypeAlign(Ty); +    Align TyAlignment = +        (IsSpillOfValue && MaxFrameAlignment) +            ? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign) +            : ABIAlign; +    if (!FieldAlignment) { +      FieldAlignment = TyAlignment; +    }      // Lay out header fields immediately.      uint64_t Offset; @@ -492,12 +547,20 @@ public:      assert(IsFinished && "not yet finished!");      return Fields[Id].LayoutFieldIndex;    } + +  Field getLayoutField(FieldIDType Id) const { +    assert(IsFinished && "not yet finished!"); +    return Fields[Id]; +  }  };  } // namespace  void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {    auto Updater = [&](Value *I) { -    setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I))); +    auto Field = B.getLayoutField(getFieldIndex(I)); +    setFieldIndex(I, Field.LayoutFieldIndex); +    setAlign(I, Field.Alignment.value()); +    setOffset(I, Field.Offset);    };    LayoutIndexUpdateStarted = true;    for (auto &S : Spills) @@ -510,7 +573,6 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {  void FrameTypeBuilder::addFieldForAllocas(const Function &F,                                            FrameDataInfo &FrameData,                                            coro::Shape &Shape) { -  DenseMap<AllocaInst *, unsigned int> AllocaIndex;    using AllocaSetType = SmallVector<AllocaInst *, 4>;    SmallVector<AllocaSetType, 4> NonOverlapedAllocas; @@ -532,7 +594,6 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,    if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) {      for (const auto &A : FrameData.Allocas) {        AllocaInst *Alloca = A.Alloca; -      AllocaIndex[Alloca] = NonOverlapedAllocas.size();        NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));      }      return; @@ -613,13 +674,11 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,        bool CouldMerge = NoInference && Alignable;        if (!CouldMerge)          continue; -      AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()];        AllocaSet.push_back(Alloca);        Merged = true;        break;      }      if (!Merged) { -      AllocaIndex[Alloca] = NonOverlapedAllocas.size();        NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));      }    } @@ -716,6 +775,314 @@ void FrameTypeBuilder::finish(StructType *Ty) {    IsFinished = true;  } +static void cacheDIVar(FrameDataInfo &FrameData, +                       DenseMap<Value *, DILocalVariable *> &DIVarCache) { +  for (auto *V : FrameData.getAllDefs()) { +    if (DIVarCache.find(V) != DIVarCache.end()) +      continue; + +    auto DDIs = FindDbgDeclareUses(V); +    auto *I = llvm::find_if(DDIs, [](DbgDeclareInst *DDI) { +      return DDI->getExpression()->getNumElements() == 0; +    }); +    if (I != DDIs.end()) +      DIVarCache.insert({V, (*I)->getVariable()}); +  } +} + +/// Create name for Type. It uses MDString to store new created string to +/// avoid memory leak. +static StringRef solveTypeName(Type *Ty) { +  if (Ty->isIntegerTy()) { +    // The longest name in common may be '__int_128', which has 9 bits. +    SmallString<16> Buffer; +    raw_svector_ostream OS(Buffer); +    OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth(); +    auto *MDName = MDString::get(Ty->getContext(), OS.str()); +    return MDName->getString(); +  } + +  if (Ty->isFloatingPointTy()) { +    if (Ty->isFloatTy()) +      return "__float_"; +    if (Ty->isDoubleTy()) +      return "__double_"; +    return "__floating_type_"; +  } + +  if (Ty->isPointerTy()) { +    auto *PtrTy = cast<PointerType>(Ty); +    Type *PointeeTy = PtrTy->getElementType(); +    auto Name = solveTypeName(PointeeTy); +    if (Name == "UnknownType") +      return "PointerType"; +    SmallString<16> Buffer; +    Twine(Name + "_Ptr").toStringRef(Buffer); +    auto *MDName = MDString::get(Ty->getContext(), Buffer.str()); +    return MDName->getString(); +  } + +  if (Ty->isStructTy()) { +    if (!cast<StructType>(Ty)->hasName()) +      return "__LiteralStructType_"; + +    auto Name = Ty->getStructName(); + +    SmallString<16> Buffer(Name); +    for_each(Buffer, [](auto &Iter) { +      if (Iter == '.' || Iter == ':') +        Iter = '_'; +    }); +    auto *MDName = MDString::get(Ty->getContext(), Buffer.str()); +    return MDName->getString(); +  } + +  return "UnknownType"; +} + +static DIType *solveDIType(DIBuilder &Builder, Type *Ty, DataLayout &Layout, +                           DIScope *Scope, unsigned LineNum, +                           DenseMap<Type *, DIType *> &DITypeCache) { +  if (DIType *DT = DITypeCache.lookup(Ty)) +    return DT; + +  StringRef Name = solveTypeName(Ty); + +  DIType *RetType = nullptr; + +  if (Ty->isIntegerTy()) { +    auto BitWidth = cast<IntegerType>(Ty)->getBitWidth(); +    RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed, +                                      llvm::DINode::FlagArtificial); +  } else if (Ty->isFloatingPointTy()) { +    RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty), +                                      dwarf::DW_ATE_float, +                                      llvm::DINode::FlagArtificial); +  } else if (Ty->isPointerTy()) { +    // Construct BasicType instead of PointerType to avoid infinite +    // search problem. +    // For example, we would be in trouble if we traverse recursively: +    // +    //  struct Node { +    //      Node* ptr; +    //  }; +    RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty), +                                      dwarf::DW_ATE_address, +                                      llvm::DINode::FlagArtificial); +  } else if (Ty->isStructTy()) { +    auto *DIStruct = Builder.createStructType( +        Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty), +        Layout.getPrefTypeAlignment(Ty), llvm::DINode::FlagArtificial, nullptr, +        llvm::DINodeArray()); + +    auto *StructTy = cast<StructType>(Ty); +    SmallVector<Metadata *, 16> Elements; +    for (unsigned I = 0; I < StructTy->getNumElements(); I++) { +      DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout, +                                 Scope, LineNum, DITypeCache); +      assert(DITy); +      Elements.push_back(Builder.createMemberType( +          Scope, DITy->getName(), Scope->getFile(), LineNum, +          DITy->getSizeInBits(), DITy->getAlignInBits(), +          Layout.getStructLayout(StructTy)->getElementOffsetInBits(I), +          llvm::DINode::FlagArtificial, DITy)); +    } + +    Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements)); + +    RetType = DIStruct; +  } else { +    LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n";); +    SmallString<32> Buffer; +    raw_svector_ostream OS(Buffer); +    OS << Name.str() << "_" << Layout.getTypeSizeInBits(Ty); +    RetType = Builder.createBasicType(OS.str(), Layout.getTypeSizeInBits(Ty), +                                      dwarf::DW_ATE_address, +                                      llvm::DINode::FlagArtificial); +  } + +  DITypeCache.insert({Ty, RetType}); +  return RetType; +} + +/// Build artificial debug info for C++ coroutine frames to allow users to +/// inspect the contents of the frame directly +/// +/// Create Debug information for coroutine frame with debug name "__coro_frame". +/// The debug information for the fields of coroutine frame is constructed from +/// the following way: +/// 1. For all the value in the Frame, we search the use of dbg.declare to find +///    the corresponding debug variables for the value. If we can find the +///    debug variable, we can get full and accurate debug information. +/// 2. If we can't get debug information in step 1 and 2, we could only try to +///    build the DIType by Type. We did this in solveDIType. We only handle +///    integer, float, double, integer type and struct type for now. +static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, +                                FrameDataInfo &FrameData) { +  DISubprogram *DIS = F.getSubprogram(); +  // If there is no DISubprogram for F, it implies the Function are not compiled +  // with debug info. So we also don't need to generate debug info for the frame +  // neither. +  if (!DIS || !DIS->getUnit() || +      !dwarf::isCPlusPlus( +          (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage())) +    return; + +  assert(Shape.ABI == coro::ABI::Switch && +         "We could only build debug infomation for C++ coroutine now.\n"); + +  DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false); + +  AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); +  assert(PromiseAlloca && +         "Coroutine with switch ABI should own Promise alloca"); + +  TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(PromiseAlloca); +  if (DIs.empty()) +    return; + +  DbgDeclareInst *PromiseDDI = DIs.front(); +  DILocalVariable *PromiseDIVariable = PromiseDDI->getVariable(); +  DILocalScope *PromiseDIScope = PromiseDIVariable->getScope(); +  DIFile *DFile = PromiseDIScope->getFile(); +  DILocation *DILoc = PromiseDDI->getDebugLoc().get(); +  unsigned LineNum = PromiseDIVariable->getLine(); + +  DICompositeType *FrameDITy = DBuilder.createStructType( +      DIS, "__coro_frame_ty", DFile, LineNum, Shape.FrameSize * 8, +      Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr, +      llvm::DINodeArray()); +  StructType *FrameTy = Shape.FrameTy; +  SmallVector<Metadata *, 16> Elements; +  DataLayout Layout = F.getParent()->getDataLayout(); + +  DenseMap<Value *, DILocalVariable *> DIVarCache; +  cacheDIVar(FrameData, DIVarCache); + +  unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume; +  unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy; +  unsigned IndexIndex = Shape.SwitchLowering.IndexField; + +  DenseMap<unsigned, StringRef> NameCache; +  NameCache.insert({ResumeIndex, "__resume_fn"}); +  NameCache.insert({DestroyIndex, "__destroy_fn"}); +  NameCache.insert({IndexIndex, "__coro_index"}); + +  Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex), +       *DestroyFnTy = FrameTy->getElementType(DestroyIndex), +       *IndexTy = FrameTy->getElementType(IndexIndex); + +  DenseMap<unsigned, DIType *> TyCache; +  TyCache.insert({ResumeIndex, +                  DBuilder.createBasicType("__resume_fn", +                                           Layout.getTypeSizeInBits(ResumeFnTy), +                                           dwarf::DW_ATE_address)}); +  TyCache.insert( +      {DestroyIndex, DBuilder.createBasicType( +                         "__destroy_fn", Layout.getTypeSizeInBits(DestroyFnTy), +                         dwarf::DW_ATE_address)}); + +  /// FIXME: If we fill the field `SizeInBits` with the actual size of +  /// __coro_index in bits, then __coro_index wouldn't show in the debugger. +  TyCache.insert({IndexIndex, DBuilder.createBasicType( +                                  "__coro_index", +                                  (Layout.getTypeSizeInBits(IndexTy) < 8) +                                      ? 8 +                                      : Layout.getTypeSizeInBits(IndexTy), +                                  dwarf::DW_ATE_unsigned_char)}); + +  for (auto *V : FrameData.getAllDefs()) { +    if (DIVarCache.find(V) == DIVarCache.end()) +      continue; + +    auto Index = FrameData.getFieldIndex(V); + +    NameCache.insert({Index, DIVarCache[V]->getName()}); +    TyCache.insert({Index, DIVarCache[V]->getType()}); +  } + +  // Cache from index to (Align, Offset Pair) +  DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache; +  // The Align and Offset of Resume function and Destroy function are fixed. +  OffsetCache.insert({ResumeIndex, {8, 0}}); +  OffsetCache.insert({DestroyIndex, {8, 8}}); +  OffsetCache.insert( +      {IndexIndex, +       {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}}); + +  for (auto *V : FrameData.getAllDefs()) { +    auto Index = FrameData.getFieldIndex(V); + +    OffsetCache.insert( +        {Index, {FrameData.getAlign(V), FrameData.getOffset(V)}}); +  } + +  DenseMap<Type *, DIType *> DITypeCache; +  // This counter is used to avoid same type names. e.g., there would be +  // many i32 and i64 types in one coroutine. And we would use i32_0 and +  // i32_1 to avoid the same type. Since it makes no sense the name of the +  // fields confilicts with each other. +  unsigned UnknownTypeNum = 0; +  for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) { +    if (OffsetCache.find(Index) == OffsetCache.end()) +      continue; + +    std::string Name; +    uint64_t SizeInBits; +    uint32_t AlignInBits; +    uint64_t OffsetInBits; +    DIType *DITy = nullptr; + +    Type *Ty = FrameTy->getElementType(Index); +    assert(Ty->isSized() && "We can't handle type which is not sized.\n"); +    SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedSize(); +    AlignInBits = OffsetCache[Index].first * 8; +    OffsetInBits = OffsetCache[Index].second * 8; + +    if (NameCache.find(Index) != NameCache.end()) { +      Name = NameCache[Index].str(); +      DITy = TyCache[Index]; +    } else { +      DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache); +      assert(DITy && "SolveDIType shouldn't return nullptr.\n"); +      Name = DITy->getName().str(); +      Name += "_" + std::to_string(UnknownTypeNum); +      UnknownTypeNum++; +    } + +    Elements.push_back(DBuilder.createMemberType( +        FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits, +        llvm::DINode::FlagArtificial, DITy)); +  } + +  DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements)); + +  auto *FrameDIVar = DBuilder.createAutoVariable(PromiseDIScope, "__coro_frame", +                                                 DFile, LineNum, FrameDITy, +                                                 true, DINode::FlagArtificial); +  assert(FrameDIVar->isValidLocationForIntrinsic(PromiseDDI->getDebugLoc())); + +  // Subprogram would have ContainedNodes field which records the debug +  // variables it contained. So we need to add __coro_frame to the +  // ContainedNodes of it. +  // +  // If we don't add __coro_frame to the RetainedNodes, user may get +  // `no symbol __coro_frame in context` rather than `__coro_frame` +  // is optimized out, which is more precise. +  if (auto *SubProgram = dyn_cast<DISubprogram>(PromiseDIScope)) { +    auto RetainedNodes = SubProgram->getRetainedNodes(); +    SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(), +                                                 RetainedNodes.end()); +    RetainedNodesVec.push_back(FrameDIVar); +    SubProgram->replaceOperandWith( +        7, (MDTuple::get(F.getContext(), RetainedNodesVec))); +  } + +  DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar, +                         DBuilder.createExpression(), DILoc, +                         Shape.FramePtr->getNextNode()); +} +  // Build a struct that will keep state for an active coroutine.  //   struct f.frame {  //     ResumeFnTy ResumeFnAddr; @@ -734,7 +1101,11 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,      return StructType::create(C, Name);    }(); -  FrameTypeBuilder B(C, DL); +  // We will use this value to cap the alignment of spilled values. +  Optional<Align> MaxFrameAlignment; +  if (Shape.ABI == coro::ABI::Async) +    MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment(); +  FrameTypeBuilder B(C, DL, MaxFrameAlignment);    AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();    Optional<FieldIDType> SwitchIndexFieldId; @@ -781,7 +1152,14 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,          PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{}, false);    // Create an entry for every spilled value.    for (auto &S : FrameData.Spills) { -    FieldIDType Id = B.addField(S.first->getType(), None); +    Type *FieldType = S.first->getType(); +    // For byval arguments, we need to store the pointed value in the frame, +    // instead of the pointer itself. +    if (const Argument *A = dyn_cast<Argument>(S.first)) +      if (A->hasByValAttr()) +        FieldType = A->getParamByValType(); +    FieldIDType Id = +        B.addField(FieldType, None, false /*header*/, true /*IsSpillOfValue*/);      FrameData.setFieldIndex(S.first, Id);    } @@ -791,15 +1169,18 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,    Shape.FrameSize = B.getStructSize();    switch (Shape.ABI) { -  case coro::ABI::Switch: +  case coro::ABI::Switch: {      // In the switch ABI, remember the switch-index field. -    Shape.SwitchLowering.IndexField = -        B.getLayoutFieldIndex(*SwitchIndexFieldId); +    auto IndexField = B.getLayoutField(*SwitchIndexFieldId); +    Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex; +    Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value(); +    Shape.SwitchLowering.IndexOffset = IndexField.Offset;      // Also round the frame size up to a multiple of its alignment, as is      // generally expected in C/C++.      Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);      break; +  }    // In the retcon ABI, remember whether the frame is inline in the storage.    case coro::ABI::Retcon: @@ -863,7 +1244,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {        : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {}    void visit(Instruction &I) { -    UserBBs.insert(I.getParent()); +    Users.insert(&I);      Base::visit(I);      // If the pointer is escaped prior to CoroBegin, we have to assume it would      // be written into before CoroBegin as well. @@ -966,6 +1347,12 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {      handleAlias(GEPI);    } +  void visitIntrinsicInst(IntrinsicInst &II) { +    if (II.getIntrinsicID() != Intrinsic::lifetime_start) +      return Base::visitIntrinsicInst(II); +    LifetimeStarts.insert(&II); +  } +    void visitCallBase(CallBase &CB) {      for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op)        if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op)) @@ -999,18 +1386,40 @@ private:    // after CoroBegin. Each entry contains the instruction and the offset in the    // original Alloca. They need to be recreated after CoroBegin off the frame.    DenseMap<Instruction *, llvm::Optional<APInt>> AliasOffetMap{}; -  SmallPtrSet<BasicBlock *, 2> UserBBs{}; +  SmallPtrSet<Instruction *, 4> Users{}; +  SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};    bool MayWriteBeforeCoroBegin{false};    mutable llvm::Optional<bool> ShouldLiveOnFrame{};    bool computeShouldLiveOnFrame() const { +    // If lifetime information is available, we check it first since it's +    // more precise. We look at every pair of lifetime.start intrinsic and +    // every basic block that uses the pointer to see if they cross suspension +    // points. The uses cover both direct uses as well as indirect uses. +    if (!LifetimeStarts.empty()) { +      for (auto *I : Users) +        for (auto *S : LifetimeStarts) +          if (Checker.isDefinitionAcrossSuspend(*S, I)) +            return true; +      return false; +    } +    // FIXME: Ideally the isEscaped check should come at the beginning. +    // However there are a few loose ends that need to be fixed first before +    // we can do that. We need to make sure we are not over-conservative, so +    // that the data accessed in-between await_suspend and symmetric transfer +    // is always put on the stack, and also data accessed after coro.end is +    // always put on the stack (esp the return object). To fix that, we need +    // to: +    //  1) Potentially treat sret as nocapture in calls +    //  2) Special handle the return object and put it on the stack +    //  3) Utilize lifetime.end intrinsic      if (PI.isEscaped())        return true; -    for (auto *BB1 : UserBBs) -      for (auto *BB2 : UserBBs) -        if (Checker.hasPathCrossingSuspendPoint(BB1, BB2)) +    for (auto *U1 : Users) +      for (auto *U2 : Users) +        if (Checker.isDefinitionAcrossSuspend(*U1, U2))            return true;      return false; @@ -1072,6 +1481,15 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {    return CleanupRet;  } +static void createFramePtr(coro::Shape &Shape) { +  auto *CB = Shape.CoroBegin; +  IRBuilder<> Builder(CB->getNextNode()); +  StructType *FrameTy = Shape.FrameTy; +  PointerType *FramePtrTy = FrameTy->getPointerTo(); +  Shape.FramePtr = +      cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); +} +  // Replace all alloca and SSA values that are accessed across suspend points  // with GetElementPointer from coroutine frame + loads and stores. Create an  // AllocaSpillBB that will become the new entry block for the resume parts of @@ -1098,11 +1516,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,                                   coro::Shape &Shape) {    auto *CB = Shape.CoroBegin;    LLVMContext &C = CB->getContext(); -  IRBuilder<> Builder(CB->getNextNode()); +  IRBuilder<> Builder(C);    StructType *FrameTy = Shape.FrameTy; -  PointerType *FramePtrTy = FrameTy->getPointerTo(); -  auto *FramePtr = -      cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); +  Instruction *FramePtr = Shape.FramePtr;    DominatorTree DT(*CB->getFunction());    SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache; @@ -1146,9 +1562,11 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,    for (auto const &E : FrameData.Spills) {      Value *Def = E.first; +    auto SpillAlignment = Align(FrameData.getAlign(Def));      // Create a store instruction storing the value into the      // coroutine frame.      Instruction *InsertPt = nullptr; +    bool NeedToCopyArgPtrValue = false;      if (auto *Arg = dyn_cast<Argument>(Def)) {        // For arguments, we will place the store instruction right after        // the coroutine frame pointer instruction, i.e. bitcast of @@ -1159,6 +1577,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,        // from the coroutine function.        Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture); +      if (Arg->hasByValAttr()) +        NeedToCopyArgPtrValue = true; +      } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {        // Don't spill immediately after a suspend; splitting assumes        // that the suspend will be followed by a branch. @@ -1193,7 +1614,15 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,      Builder.SetInsertPoint(InsertPt);      auto *G = Builder.CreateConstInBoundsGEP2_32(          FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr")); -    Builder.CreateStore(Def, G); +    if (NeedToCopyArgPtrValue) { +      // For byval arguments, we need to store the pointed value in the frame, +      // instead of the pointer itself. +      auto *Value = +          Builder.CreateLoad(Def->getType()->getPointerElementType(), Def); +      Builder.CreateAlignedStore(Value, G, SpillAlignment); +    } else { +      Builder.CreateAlignedStore(Def, G, SpillAlignment); +    }      BasicBlock *CurrentBlock = nullptr;      Value *CurrentReload = nullptr; @@ -1207,9 +1636,12 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,          auto *GEP = GetFramePointer(E.first);          GEP->setName(E.first->getName() + Twine(".reload.addr")); -        CurrentReload = Builder.CreateLoad( -            FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, -            E.first->getName() + Twine(".reload")); +        if (NeedToCopyArgPtrValue) +          CurrentReload = GEP; +        else +          CurrentReload = Builder.CreateAlignedLoad( +              FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, +              SpillAlignment, E.first->getName() + Twine(".reload"));          TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);          for (DbgDeclareInst *DDI : DIs) { @@ -1223,7 +1655,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,                               &*Builder.GetInsertPoint());            // This dbg.declare is for the main function entry point.  It            // will be deleted in all coro-split functions. -          coro::salvageDebugInfo(DbgPtrAllocaCache, DDI); +          coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.ReuseFrameSlot);          }        } @@ -1271,8 +1703,8 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,    }    // If we found any alloca, replace all of their remaining uses with GEP -  // instructions. Because new dbg.declare have been created for these alloca, -  // we also delete the original dbg.declare and replace other uses with undef. +  // instructions. To remain debugbility, we replace the uses of allocas for +  // dbg.declares and dbg.values with the reload from the frame.    // Note: We cannot replace the alloca with GEP instructions indiscriminately,    // as some of the uses may not be dominated by CoroBegin.    Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); @@ -1290,17 +1722,10 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,      auto *G = GetFramePointer(Alloca);      G->setName(Alloca->getName() + Twine(".reload.addr")); -    SmallPtrSet<BasicBlock *, 4> SeenDbgBBs; -    TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Alloca); -    if (!DIs.empty()) -      DIBuilder(*Alloca->getModule(), -                /*AllowUnresolved*/ false) -          .insertDeclare(G, DIs.front()->getVariable(), -                         DIs.front()->getExpression(), -                         DIs.front()->getDebugLoc(), DIs.front()); -    for (auto *DI : FindDbgDeclareUses(Alloca)) -      DI->eraseFromParent(); -    replaceDbgUsesWithUndef(Alloca); +    SmallVector<DbgVariableIntrinsic *, 4> DIs; +    findDbgUsers(DIs, Alloca); +    for (auto *DVI : DIs) +      DVI->replaceUsesOfWith(Alloca, G);      for (Instruction *I : UsersToUpdate)        I->replaceUsesOfWith(Alloca, G); @@ -1326,7 +1751,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,        auto *FramePtrRaw =            Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));        auto *AliasPtr = Builder.CreateGEP( -          FramePtrRaw, +          Type::getInt8Ty(C), FramePtrRaw,            ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue()));        auto *AliasPtrTyped =            Builder.CreateBitCast(AliasPtr, Alias.first->getType()); @@ -1337,77 +1762,6 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,    return FramePtr;  } -// Sets the unwind edge of an instruction to a particular successor. -static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) { -  if (auto *II = dyn_cast<InvokeInst>(TI)) -    II->setUnwindDest(Succ); -  else if (auto *CS = dyn_cast<CatchSwitchInst>(TI)) -    CS->setUnwindDest(Succ); -  else if (auto *CR = dyn_cast<CleanupReturnInst>(TI)) -    CR->setUnwindDest(Succ); -  else -    llvm_unreachable("unexpected terminator instruction"); -} - -// Replaces all uses of OldPred with the NewPred block in all PHINodes in a -// block. -static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, -                           BasicBlock *NewPred, PHINode *Until = nullptr) { -  unsigned BBIdx = 0; -  for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { -    PHINode *PN = cast<PHINode>(I); - -    // We manually update the LandingPadReplacement PHINode and it is the last -    // PHI Node. So, if we find it, we are done. -    if (Until == PN) -      break; - -    // Reuse the previous value of BBIdx if it lines up.  In cases where we -    // have multiple phi nodes with *lots* of predecessors, this is a speed -    // win because we don't have to scan the PHI looking for TIBB.  This -    // happens because the BB list of PHI nodes are usually in the same -    // order. -    if (PN->getIncomingBlock(BBIdx) != OldPred) -      BBIdx = PN->getBasicBlockIndex(OldPred); - -    assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!"); -    PN->setIncomingBlock(BBIdx, NewPred); -  } -} - -// Uses SplitEdge unless the successor block is an EHPad, in which case do EH -// specific handling. -static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, -                                    LandingPadInst *OriginalPad, -                                    PHINode *LandingPadReplacement) { -  auto *PadInst = Succ->getFirstNonPHI(); -  if (!LandingPadReplacement && !PadInst->isEHPad()) -    return SplitEdge(BB, Succ); - -  auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ); -  setUnwindEdgeTo(BB->getTerminator(), NewBB); -  updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement); - -  if (LandingPadReplacement) { -    auto *NewLP = OriginalPad->clone(); -    auto *Terminator = BranchInst::Create(Succ, NewBB); -    NewLP->insertBefore(Terminator); -    LandingPadReplacement->addIncoming(NewLP, NewBB); -    return NewBB; -  } -  Value *ParentPad = nullptr; -  if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst)) -    ParentPad = FuncletPad->getParentPad(); -  else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst)) -    ParentPad = CatchSwitch->getParentPad(); -  else -    llvm_unreachable("handling for other EHPads not implemented yet"); - -  auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB); -  CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB); -  return NewBB; -} -  // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new  // PHI in InsertedBB.  static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB, @@ -1503,6 +1857,24 @@ static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,    }  } +static void cleanupSinglePredPHIs(Function &F) { +  SmallVector<PHINode *, 32> Worklist; +  for (auto &BB : F) { +    for (auto &Phi : BB.phis()) { +      if (Phi.getNumIncomingValues() == 1) { +        Worklist.push_back(&Phi); +      } else +        break; +    } +  } +  while (!Worklist.empty()) { +    auto *Phi = Worklist.back(); +    Worklist.pop_back(); +    auto *OriginalValue = Phi->getIncomingValue(0); +    Phi->replaceAllUsesWith(OriginalValue); +  } +} +  static void rewritePHIs(BasicBlock &BB) {    // For every incoming edge we will create a block holding all    // incoming values in a single PHI nodes. @@ -1610,11 +1982,16 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB,      for (Instruction *U : E.second) {        // If we have not seen this block, materialize the value.        if (CurrentBlock != U->getParent()) { -        CurrentBlock = U->getParent(); + +        bool IsInCoroSuspendBlock = isa<AnyCoroSuspendInst>(U); +        CurrentBlock = IsInCoroSuspendBlock +                           ? U->getParent()->getSinglePredecessor() +                           : U->getParent();          CurrentMaterialization = cast<Instruction>(Def)->clone();          CurrentMaterialization->setName(Def->getName());          CurrentMaterialization->insertBefore( -            &*CurrentBlock->getFirstInsertionPt()); +            IsInCoroSuspendBlock ? CurrentBlock->getTerminator() +                                 : &*CurrentBlock->getFirstInsertionPt());        }        if (auto *PN = dyn_cast<PHINode>(U)) {          assert(PN->getNumIncomingValues() == 1 && @@ -2101,24 +2478,6 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,  static void collectFrameAllocas(Function &F, coro::Shape &Shape,                                  const SuspendCrossingInfo &Checker,                                  SmallVectorImpl<AllocaInfo> &Allocas) { -  // Collect lifetime.start info for each alloca. -  using LifetimeStart = SmallPtrSet<Instruction *, 2>; -  llvm::DenseMap<AllocaInst *, std::unique_ptr<LifetimeStart>> LifetimeMap; -  for (Instruction &I : instructions(F)) { -    auto *II = dyn_cast<IntrinsicInst>(&I); -    if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) -      continue; - -    if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) { -      if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) { - -        if (LifetimeMap.find(AI) == LifetimeMap.end()) -          LifetimeMap[AI] = std::make_unique<LifetimeStart>(); -        LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst); -      } -    } -  } -    for (Instruction &I : instructions(F)) {      auto *AI = dyn_cast<AllocaInst>(&I);      if (!AI) @@ -2128,23 +2487,6 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,      if (AI == Shape.SwitchLowering.PromiseAlloca) {        continue;      } -    bool ShouldLiveOnFrame = false; -    auto Iter = LifetimeMap.find(AI); -    if (Iter != LifetimeMap.end()) { -      // Check against lifetime.start if the instruction has the info. -      for (User *U : I.users()) { -        for (auto *S : *Iter->second) -          if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U))) -            break; -        if (ShouldLiveOnFrame) -          break; -      } -      if (!ShouldLiveOnFrame) -        continue; -    } -    // At this point, either ShouldLiveOnFrame is true or we didn't have -    // lifetime information. We will need to rely on more precise pointer -    // tracking.      DominatorTree DT(F);      AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,                               *Shape.CoroBegin, Checker}; @@ -2158,58 +2500,94 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,  void coro::salvageDebugInfo(      SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache, -    DbgDeclareInst *DDI, bool LoadFromFramePtr) { -  Function *F = DDI->getFunction(); +    DbgVariableIntrinsic *DVI, bool ReuseFrameSlot) { +  Function *F = DVI->getFunction();    IRBuilder<> Builder(F->getContext());    auto InsertPt = F->getEntryBlock().getFirstInsertionPt();    while (isa<IntrinsicInst>(InsertPt))      ++InsertPt;    Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt); -  DIExpression *Expr = DDI->getExpression(); +  DIExpression *Expr = DVI->getExpression();    // Follow the pointer arithmetic all the way to the incoming    // function argument and convert into a DIExpression. -  Value *Storage = DDI->getAddress(); +  bool OutermostLoad = true; +  Value *Storage = DVI->getVariableLocationOp(0); +  Value *OriginalStorage = Storage;    while (Storage) {      if (auto *LdInst = dyn_cast<LoadInst>(Storage)) {        Storage = LdInst->getOperand(0); +      // FIXME: This is a heuristic that works around the fact that +      // LLVM IR debug intrinsics cannot yet distinguish between +      // memory and value locations: Because a dbg.declare(alloca) is +      // implicitly a memory location no DW_OP_deref operation for the +      // last direct load from an alloca is necessary.  This condition +      // effectively drops the *last* DW_OP_deref in the expression. +      if (!OutermostLoad) +        Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); +      OutermostLoad = false;      } else if (auto *StInst = dyn_cast<StoreInst>(Storage)) {        Storage = StInst->getOperand(0);      } else if (auto *GEPInst = dyn_cast<GetElementPtrInst>(Storage)) { -      Expr = llvm::salvageDebugInfoImpl(*GEPInst, Expr, -                                        /*WithStackValue=*/false); +      SmallVector<Value *> AdditionalValues; +      DIExpression *SalvagedExpr = llvm::salvageDebugInfoImpl( +          *GEPInst, Expr, +          /*WithStackValue=*/false, 0, AdditionalValues); +      // Debug declares cannot currently handle additional location +      // operands. +      if (!SalvagedExpr || !AdditionalValues.empty()) +        break; +      Expr = SalvagedExpr;        Storage = GEPInst->getOperand(0);      } else if (auto *BCInst = dyn_cast<llvm::BitCastInst>(Storage))        Storage = BCInst->getOperand(0);      else        break;    } +  if (!Storage) +    return; +    // Store a pointer to the coroutine frame object in an alloca so it    // is available throughout the function when producing unoptimized    // code. Extending the lifetime this way is correct because the    // variable has been declared by a dbg.declare intrinsic. -  if (auto Arg = dyn_cast_or_null<llvm::Argument>(Storage)) { -    auto &Cached = DbgPtrAllocaCache[Storage]; -    if (!Cached) { -      Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr, -                                    Arg->getName() + ".debug"); -      Builder.CreateStore(Storage, Cached); +  // +  // Avoid to create the alloca would be eliminated by optimization +  // passes and the corresponding dbg.declares would be invalid. +  if (!ReuseFrameSlot && !EnableReuseStorageInFrame) +    if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) { +      auto &Cached = DbgPtrAllocaCache[Storage]; +      if (!Cached) { +        Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr, +                                      Arg->getName() + ".debug"); +        Builder.CreateStore(Storage, Cached); +      } +      Storage = Cached; +      // FIXME: LLVM lacks nuanced semantics to differentiate between +      // memory and direct locations at the IR level. The backend will +      // turn a dbg.declare(alloca, ..., DIExpression()) into a memory +      // location. Thus, if there are deref and offset operations in the +      // expression, we need to add a DW_OP_deref at the *start* of the +      // expression to first load the contents of the alloca before +      // adjusting it with the expression. +      if (Expr && Expr->isComplex()) +        Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);      } -    Storage = Cached; -  } -  // The FramePtr object adds one extra layer of indirection that -  // needs to be unwrapped. -  if (LoadFromFramePtr) -    Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); -  auto &VMContext = DDI->getFunction()->getContext(); -  DDI->setOperand( -      0, MetadataAsValue::get(VMContext, ValueAsMetadata::get(Storage))); -  DDI->setOperand(2, MetadataAsValue::get(VMContext, Expr)); -  if (auto *InsertPt = dyn_cast_or_null<Instruction>(Storage)) -    DDI->moveAfter(InsertPt); + +  DVI->replaceVariableLocationOp(OriginalStorage, Storage); +  DVI->setExpression(Expr); +  /// It makes no sense to move the dbg.value intrinsic. +  if (!isa<DbgValueInst>(DVI)) { +    if (auto *InsertPt = dyn_cast<Instruction>(Storage)) +      DVI->moveAfter(InsertPt); +    else if (isa<Argument>(Storage)) +      DVI->moveAfter(F->getEntryBlock().getFirstNonPHI()); +  }  }  void coro::buildCoroutineFrame(Function &F, Shape &Shape) { -  eliminateSwiftError(F, Shape); +  // Don't eliminate swifterror in async functions that won't be split. +  if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty()) +    eliminateSwiftError(F, Shape);    if (Shape.ABI == coro::ABI::Switch &&        Shape.SwitchLowering.PromiseAlloca) { @@ -2246,6 +2624,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {      }    } +  // Later code makes structural assumptions about single predecessors phis e.g +  // that they are not live accross a suspend point. +  cleanupSinglePredPHIs(F); +    // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will    // never has its definition separated from the PHI by the suspend point.    rewritePHIs(F); @@ -2263,11 +2645,19 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {      for (int Repeat = 0; Repeat < 4; ++Repeat) {        // See if there are materializable instructions across suspend points.        for (Instruction &I : instructions(F)) -        if (materializable(I)) +        if (materializable(I)) {            for (User *U : I.users())              if (Checker.isDefinitionAcrossSuspend(I, U))                Spills[&I].push_back(cast<Instruction>(U)); +          // Manually add dbg.value metadata uses of I. +          SmallVector<DbgValueInst *, 16> DVIs; +          findDbgValues(DVIs, &I); +          for (auto *DVI : DVIs) +            if (Checker.isDefinitionAcrossSuspend(I, DVI)) +              Spills[&I].push_back(DVI); +        } +        if (Spills.empty())          break; @@ -2280,7 +2670,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {    }    sinkLifetimeStartMarkers(F, Shape, Checker); -  collectFrameAllocas(F, Shape, Checker, FrameData.Allocas); +  if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty()) +    collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);    LLVM_DEBUG(dumpAllocas(FrameData.Allocas));    // Collect the spills for arguments and other not-materializable values. @@ -2339,12 +2730,30 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {          FrameData.Spills[&I].push_back(cast<Instruction>(U));        }    } + +  // We don't want the layout of coroutine frame to be affected +  // by debug information. So we only choose to salvage DbgValueInst for +  // whose value is already in the frame. +  // We would handle the dbg.values for allocas specially +  for (auto &Iter : FrameData.Spills) { +    auto *V = Iter.first; +    SmallVector<DbgValueInst *, 16> DVIs; +    findDbgValues(DVIs, V); +    llvm::for_each(DVIs, [&](DbgValueInst *DVI) { +      if (Checker.isDefinitionAcrossSuspend(*V, DVI)) +        FrameData.Spills[V].push_back(DVI); +    }); +  } +    LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));    if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||        Shape.ABI == coro::ABI::Async)      sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin);    Shape.FrameTy = buildFrameType(F, Shape, FrameData); -  Shape.FramePtr = insertSpills(FrameData, Shape); +  createFramePtr(Shape); +  // For now, this works for C++ programs only. +  buildFrameDebugInfo(F, Shape, FrameData); +  insertSpills(FrameData, Shape);    lowerLocalAllocas(LocalAllocas, DeadInstructions);    for (auto I : DeadInstructions) | 
