aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Coroutines
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-08-22 19:00:43 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-11-13 20:39:49 +0000
commitfe6060f10f634930ff71b7c50291ddc610da2475 (patch)
tree1483580c790bd4d27b6500a7542b5ee00534d3cc /contrib/llvm-project/llvm/lib/Transforms/Coroutines
parentb61bce17f346d79cecfd8f195a64b10f77be43b1 (diff)
parent344a3780b2e33f6ca763666c380202b18aab72a3 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Coroutines')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp752
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp210
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp11
7 files changed, 823 insertions, 287 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index 298149f8b546..5b09cdb35791 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -80,6 +80,23 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
case Intrinsic::coro_subfn_addr:
lowerSubFn(Builder, cast<CoroSubFnInst>(II));
break;
+ case Intrinsic::coro_async_size_replace:
+ auto *Target = cast<ConstantStruct>(
+ cast<GlobalVariable>(II->getArgOperand(0)->stripPointerCasts())
+ ->getInitializer());
+ auto *Source = cast<ConstantStruct>(
+ cast<GlobalVariable>(II->getArgOperand(1)->stripPointerCasts())
+ ->getInitializer());
+ auto *TargetSize = Target->getOperand(1);
+ auto *SourceSize = Source->getOperand(1);
+ if (TargetSize->isElementWiseEqual(SourceSize)) {
+ break;
+ }
+ auto *TargetRelativeFunOffset = Target->getOperand(0);
+ auto *NewFuncPtrStruct = ConstantStruct::get(
+ Target->getType(), TargetRelativeFunOffset, SourceSize);
+ Target->replaceAllUsesWith(NewFuncPtrStruct);
+ break;
}
II->eraseFromParent();
Changed = true;
@@ -95,10 +112,10 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
}
static bool declaresCoroCleanupIntrinsics(const Module &M) {
- return coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin",
- "llvm.coro.subfn.addr", "llvm.coro.free",
- "llvm.coro.id", "llvm.coro.id.retcon",
- "llvm.coro.id.retcon.once"});
+ return coro::declaresIntrinsics(
+ M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr",
+ "llvm.coro.free", "llvm.coro.id", "llvm.coro.id.retcon",
+ "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace"});
}
PreservedAnalyses CoroCleanupPass::run(Function &F,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index 07a183cfc66b..84bebb7bf42d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -9,6 +9,7 @@
#include "llvm/Transforms/Coroutines/CoroElide.h"
#include "CoroInternal.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/Dominators.h"
@@ -16,11 +17,20 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
using namespace llvm;
#define DEBUG_TYPE "coro-elide"
+STATISTIC(NumOfCoroElided, "The # of coroutine get elided.");
+
+#ifndef NDEBUG
+static cl::opt<std::string> CoroElideInfoOutputFilename(
+ "coro-elide-info-output-file", cl::value_desc("filename"),
+ cl::desc("File to record the coroutines got elided"), cl::Hidden);
+#endif
+
namespace {
// Created on demand if the coro-elide pass has work to do.
struct Lowerer : coro::LowererBase {
@@ -29,7 +39,6 @@ struct Lowerer : coro::LowererBase {
SmallVector<CoroAllocInst *, 1> CoroAllocs;
SmallVector<CoroSubFnInst *, 4> ResumeAddr;
DenseMap<CoroBeginInst *, SmallVector<CoroSubFnInst *, 4>> DestroyAddr;
- SmallVector<CoroFreeInst *, 1> CoroFrees;
SmallPtrSet<const SwitchInst *, 4> CoroSuspendSwitches;
Lowerer(Module &M) : LowererBase(M) {}
@@ -71,7 +80,7 @@ static void replaceWithConstant(Constant *Value,
// See if any operand of the call instruction references the coroutine frame.
static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA) {
for (Value *Op : CI->operand_values())
- if (AA.alias(Op, Frame) != NoAlias)
+ if (!AA.isNoAlias(Op, Frame))
return true;
return false;
}
@@ -79,11 +88,16 @@ static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA) {
// Look for any tail calls referencing the coroutine frame and remove tail
// attribute from them, since now coroutine frame resides on the stack and tail
// call implies that the function does not references anything on the stack.
+// However if it's a musttail call, we cannot remove the tailcall attribute.
+// It's safe to keep it there as the musttail call is for symmetric transfer,
+// and by that point the frame should have been destroyed and hence not
+// interfering with operands.
static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
Function &F = *Frame->getFunction();
for (Instruction &I : instructions(F))
if (auto *Call = dyn_cast<CallInst>(&I))
- if (Call->isTailCall() && operandReferences(Call, Frame, AA))
+ if (Call->isTailCall() && operandReferences(Call, Frame, AA) &&
+ !Call->isMustTailCall())
Call->setTailCall(false);
}
@@ -114,6 +128,21 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
llvm_unreachable("no terminator in the entry block");
}
+#ifndef NDEBUG
+static std::unique_ptr<raw_fd_ostream> getOrCreateLogFile() {
+ assert(!CoroElideInfoOutputFilename.empty() &&
+ "coro-elide-info-output-file shouldn't be empty");
+ std::error_code EC;
+ auto Result = std::make_unique<raw_fd_ostream>(CoroElideInfoOutputFilename,
+ EC, sys::fs::OF_Append);
+ if (!EC)
+ return Result;
+ llvm::errs() << "Error opening coro-elide-info-output-file '"
+ << CoroElideInfoOutputFilename << " for appending!\n";
+ return std::make_unique<raw_fd_ostream>(2, false); // stderr.
+}
+#endif
+
// To elide heap allocations we need to suppress code blocks guarded by
// llvm.coro.alloc and llvm.coro.free instructions.
void Lowerer::elideHeapAllocations(Function *F, uint64_t FrameSize,
@@ -227,17 +256,22 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
// Filter out the coro.destroy that lie along exceptional paths.
SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
for (auto &It : DestroyAddr) {
+ // If there is any coro.destroy dominates all of the terminators for the
+ // coro.begin, we could know the corresponding coro.begin wouldn't escape.
for (Instruction *DA : It.second) {
- for (BasicBlock *TI : Terminators) {
- if (DT.dominates(DA, TI->getTerminator())) {
- ReferencedCoroBegins.insert(It.first);
- break;
- }
+ if (llvm::all_of(Terminators, [&](auto *TI) {
+ return DT.dominates(DA, TI->getTerminator());
+ })) {
+ ReferencedCoroBegins.insert(It.first);
+ break;
}
}
// Whether there is any paths from coro.begin to Terminators which not pass
// through any of the coro.destroys.
+ //
+ // hasEscapePath is relatively slow, so we avoid to run it as much as
+ // possible.
if (!ReferencedCoroBegins.count(It.first) &&
!hasEscapePath(It.first, Terminators))
ReferencedCoroBegins.insert(It.first);
@@ -246,20 +280,7 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
// If size of the set is the same as total number of coro.begin, that means we
// found a coro.free or coro.destroy referencing each coro.begin, so we can
// perform heap elision.
- if (ReferencedCoroBegins.size() != CoroBegins.size())
- return false;
-
- // If any call in the function is a musttail call, it usually won't work
- // because we cannot drop the tailcall attribute, and a tail call will reuse
- // the entire stack where we are going to put the new frame. In theory a more
- // precise analysis can be done to check whether the new frame aliases with
- // the call, however it's challenging to do so before the elision actually
- // happened.
- for (BasicBlock &BB : *F)
- if (BB.getTerminatingMustTailCall())
- return false;
-
- return true;
+ return ReferencedCoroBegins.size() == CoroBegins.size();
}
void Lowerer::collectPostSplitCoroIds(Function *F) {
@@ -290,7 +311,6 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
DominatorTree &DT) {
CoroBegins.clear();
CoroAllocs.clear();
- CoroFrees.clear();
ResumeAddr.clear();
DestroyAddr.clear();
@@ -300,8 +320,6 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
CoroBegins.push_back(CB);
else if (auto *CA = dyn_cast<CoroAllocInst>(U))
CoroAllocs.push_back(CA);
- else if (auto *CF = dyn_cast<CoroFreeInst>(U))
- CoroFrees.push_back(CF);
}
// Collect all coro.subfn.addrs associated with coro.begin.
@@ -347,6 +365,13 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first,
FrameSizeAndAlign.second, AA);
coro::replaceCoroFree(CoroId, /*Elide=*/true);
+ NumOfCoroElided++;
+#ifndef NDEBUG
+ if (!CoroElideInfoOutputFilename.empty())
+ *getOrCreateLogFile()
+ << "Elide " << CoroId->getCoroutine()->getName() << " in "
+ << CoroId->getFunction()->getName() << "\n";
+#endif
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index e1e0d50979dc..beae5fdac8ab 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -12,8 +12,6 @@
// contain those values. All uses of those values are replaced with appropriate
// GEP + load from the coroutine frame. At the point of the definition we spill
// the value into the coroutine frame.
-//
-// TODO: pack values tightly using liveness info.
//===----------------------------------------------------------------------===//
#include "CoroInternal.h"
@@ -32,6 +30,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/OptimizedStructLayout.h"
#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -163,6 +162,16 @@ struct SuspendCrossingInfo {
return isDefinitionAcrossSuspend(DefBB, U);
}
+
+ bool isDefinitionAcrossSuspend(Value &V, User *U) const {
+ if (auto *Arg = dyn_cast<Argument>(&V))
+ return isDefinitionAcrossSuspend(*Arg, U);
+ if (auto *Inst = dyn_cast<Instruction>(&V))
+ return isDefinitionAcrossSuspend(*Inst, U);
+
+ llvm_unreachable(
+ "Coroutine could only collect Argument and Instruction now.");
+ }
};
} // end anonymous namespace
@@ -336,6 +345,28 @@ struct FrameDataInfo {
FieldIndexMap[V] = Index;
}
+ uint64_t getAlign(Value *V) const {
+ auto Iter = FieldAlignMap.find(V);
+ assert(Iter != FieldAlignMap.end());
+ return Iter->second;
+ }
+
+ void setAlign(Value *V, uint64_t Align) {
+ assert(FieldAlignMap.count(V) == 0);
+ FieldAlignMap.insert({V, Align});
+ }
+
+ uint64_t getOffset(Value *V) const {
+ auto Iter = FieldOffsetMap.find(V);
+ assert(Iter != FieldOffsetMap.end());
+ return Iter->second;
+ }
+
+ void setOffset(Value *V, uint64_t Offset) {
+ assert(FieldOffsetMap.count(V) == 0);
+ FieldOffsetMap.insert({V, Offset});
+ }
+
// Remap the index of every field in the frame, using the final layout index.
void updateLayoutIndex(FrameTypeBuilder &B);
@@ -347,6 +378,12 @@ private:
// with their original insertion field index. After the frame is built, their
// indexes will be updated into the final layout index.
DenseMap<Value *, uint32_t> FieldIndexMap;
+ // Map from values to their alignment on the frame. They would be set after
+ // the frame is built.
+ DenseMap<Value *, uint64_t> FieldAlignMap;
+ // Map from values to their offset on the frame. They would be set after
+ // the frame is built.
+ DenseMap<Value *, uint64_t> FieldOffsetMap;
};
} // namespace
@@ -392,12 +429,15 @@ private:
Align StructAlign;
bool IsFinished = false;
+ Optional<Align> MaxFrameAlignment;
+
SmallVector<Field, 8> Fields;
DenseMap<Value*, unsigned> FieldIndexByKey;
public:
- FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
- : DL(DL), Context(Context) {}
+ FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL,
+ Optional<Align> MaxFrameAlignment)
+ : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
/// Add a field to this structure for the storage of an `alloca`
/// instruction.
@@ -448,17 +488,32 @@ public:
/// Add a field to this structure.
LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,
- bool IsHeader = false) {
+ bool IsHeader = false,
+ bool IsSpillOfValue = false) {
assert(!IsFinished && "adding fields to a finished builder");
assert(Ty && "must provide a type for a field");
// The field size is always the alloc size of the type.
uint64_t FieldSize = DL.getTypeAllocSize(Ty);
+ // For an alloca with size=0, we don't need to add a field and they
+ // can just point to any index in the frame. Use index 0.
+ if (FieldSize == 0) {
+ return 0;
+ }
+
// The field alignment might not be the type alignment, but we need
// to remember the type alignment anyway to build the type.
- Align TyAlignment = DL.getABITypeAlign(Ty);
- if (!FieldAlignment) FieldAlignment = TyAlignment;
+ // If we are spilling values we don't need to worry about ABI alignment
+ // concerns.
+ auto ABIAlign = DL.getABITypeAlign(Ty);
+ Align TyAlignment =
+ (IsSpillOfValue && MaxFrameAlignment)
+ ? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign)
+ : ABIAlign;
+ if (!FieldAlignment) {
+ FieldAlignment = TyAlignment;
+ }
// Lay out header fields immediately.
uint64_t Offset;
@@ -492,12 +547,20 @@ public:
assert(IsFinished && "not yet finished!");
return Fields[Id].LayoutFieldIndex;
}
+
+ Field getLayoutField(FieldIDType Id) const {
+ assert(IsFinished && "not yet finished!");
+ return Fields[Id];
+ }
};
} // namespace
void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
auto Updater = [&](Value *I) {
- setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I)));
+ auto Field = B.getLayoutField(getFieldIndex(I));
+ setFieldIndex(I, Field.LayoutFieldIndex);
+ setAlign(I, Field.Alignment.value());
+ setOffset(I, Field.Offset);
};
LayoutIndexUpdateStarted = true;
for (auto &S : Spills)
@@ -510,7 +573,6 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
void FrameTypeBuilder::addFieldForAllocas(const Function &F,
FrameDataInfo &FrameData,
coro::Shape &Shape) {
- DenseMap<AllocaInst *, unsigned int> AllocaIndex;
using AllocaSetType = SmallVector<AllocaInst *, 4>;
SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
@@ -532,7 +594,6 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) {
for (const auto &A : FrameData.Allocas) {
AllocaInst *Alloca = A.Alloca;
- AllocaIndex[Alloca] = NonOverlapedAllocas.size();
NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
}
return;
@@ -613,13 +674,11 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
bool CouldMerge = NoInference && Alignable;
if (!CouldMerge)
continue;
- AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()];
AllocaSet.push_back(Alloca);
Merged = true;
break;
}
if (!Merged) {
- AllocaIndex[Alloca] = NonOverlapedAllocas.size();
NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
}
}
@@ -716,6 +775,314 @@ void FrameTypeBuilder::finish(StructType *Ty) {
IsFinished = true;
}
+static void cacheDIVar(FrameDataInfo &FrameData,
+ DenseMap<Value *, DILocalVariable *> &DIVarCache) {
+ for (auto *V : FrameData.getAllDefs()) {
+ if (DIVarCache.find(V) != DIVarCache.end())
+ continue;
+
+ auto DDIs = FindDbgDeclareUses(V);
+ auto *I = llvm::find_if(DDIs, [](DbgDeclareInst *DDI) {
+ return DDI->getExpression()->getNumElements() == 0;
+ });
+ if (I != DDIs.end())
+ DIVarCache.insert({V, (*I)->getVariable()});
+ }
+}
+
+/// Create name for Type. It uses MDString to store new created string to
+/// avoid memory leak.
+static StringRef solveTypeName(Type *Ty) {
+ if (Ty->isIntegerTy()) {
+ // The longest name in common may be '__int_128', which has 9 bits.
+ SmallString<16> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();
+ auto *MDName = MDString::get(Ty->getContext(), OS.str());
+ return MDName->getString();
+ }
+
+ if (Ty->isFloatingPointTy()) {
+ if (Ty->isFloatTy())
+ return "__float_";
+ if (Ty->isDoubleTy())
+ return "__double_";
+ return "__floating_type_";
+ }
+
+ if (Ty->isPointerTy()) {
+ auto *PtrTy = cast<PointerType>(Ty);
+ Type *PointeeTy = PtrTy->getElementType();
+ auto Name = solveTypeName(PointeeTy);
+ if (Name == "UnknownType")
+ return "PointerType";
+ SmallString<16> Buffer;
+ Twine(Name + "_Ptr").toStringRef(Buffer);
+ auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
+ return MDName->getString();
+ }
+
+ if (Ty->isStructTy()) {
+ if (!cast<StructType>(Ty)->hasName())
+ return "__LiteralStructType_";
+
+ auto Name = Ty->getStructName();
+
+ SmallString<16> Buffer(Name);
+ for_each(Buffer, [](auto &Iter) {
+ if (Iter == '.' || Iter == ':')
+ Iter = '_';
+ });
+ auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
+ return MDName->getString();
+ }
+
+ return "UnknownType";
+}
+
+static DIType *solveDIType(DIBuilder &Builder, Type *Ty, DataLayout &Layout,
+ DIScope *Scope, unsigned LineNum,
+ DenseMap<Type *, DIType *> &DITypeCache) {
+ if (DIType *DT = DITypeCache.lookup(Ty))
+ return DT;
+
+ StringRef Name = solveTypeName(Ty);
+
+ DIType *RetType = nullptr;
+
+ if (Ty->isIntegerTy()) {
+ auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,
+ llvm::DINode::FlagArtificial);
+ } else if (Ty->isFloatingPointTy()) {
+ RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
+ dwarf::DW_ATE_float,
+ llvm::DINode::FlagArtificial);
+ } else if (Ty->isPointerTy()) {
+ // Construct BasicType instead of PointerType to avoid infinite
+ // search problem.
+ // For example, we would be in trouble if we traverse recursively:
+ //
+ // struct Node {
+ // Node* ptr;
+ // };
+ RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
+ dwarf::DW_ATE_address,
+ llvm::DINode::FlagArtificial);
+ } else if (Ty->isStructTy()) {
+ auto *DIStruct = Builder.createStructType(
+ Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
+ Layout.getPrefTypeAlignment(Ty), llvm::DINode::FlagArtificial, nullptr,
+ llvm::DINodeArray());
+
+ auto *StructTy = cast<StructType>(Ty);
+ SmallVector<Metadata *, 16> Elements;
+ for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
+ DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,
+ Scope, LineNum, DITypeCache);
+ assert(DITy);
+ Elements.push_back(Builder.createMemberType(
+ Scope, DITy->getName(), Scope->getFile(), LineNum,
+ DITy->getSizeInBits(), DITy->getAlignInBits(),
+ Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),
+ llvm::DINode::FlagArtificial, DITy));
+ }
+
+ Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));
+
+ RetType = DIStruct;
+ } else {
+ LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n";);
+ SmallString<32> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << Name.str() << "_" << Layout.getTypeSizeInBits(Ty);
+ RetType = Builder.createBasicType(OS.str(), Layout.getTypeSizeInBits(Ty),
+ dwarf::DW_ATE_address,
+ llvm::DINode::FlagArtificial);
+ }
+
+ DITypeCache.insert({Ty, RetType});
+ return RetType;
+}
+
+/// Build artificial debug info for C++ coroutine frames to allow users to
+/// inspect the contents of the frame directly
+///
+/// Create Debug information for coroutine frame with debug name "__coro_frame".
+/// The debug information for the fields of coroutine frame is constructed from
+/// the following way:
+/// 1. For all the value in the Frame, we search the use of dbg.declare to find
+/// the corresponding debug variables for the value. If we can find the
+/// debug variable, we can get full and accurate debug information.
+/// 2. If we can't get debug information in step 1 and 2, we could only try to
+/// build the DIType by Type. We did this in solveDIType. We only handle
+/// integer, float, double, integer type and struct type for now.
+static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
+ FrameDataInfo &FrameData) {
+ DISubprogram *DIS = F.getSubprogram();
+ // If there is no DISubprogram for F, it implies the Function are not compiled
+ // with debug info. So we also don't need to generate debug info for the frame
+ // neither.
+ if (!DIS || !DIS->getUnit() ||
+ !dwarf::isCPlusPlus(
+ (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()))
+ return;
+
+ assert(Shape.ABI == coro::ABI::Switch &&
+ "We could only build debug infomation for C++ coroutine now.\n");
+
+ DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
+
+ AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
+ assert(PromiseAlloca &&
+ "Coroutine with switch ABI should own Promise alloca");
+
+ TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(PromiseAlloca);
+ if (DIs.empty())
+ return;
+
+ DbgDeclareInst *PromiseDDI = DIs.front();
+ DILocalVariable *PromiseDIVariable = PromiseDDI->getVariable();
+ DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();
+ DIFile *DFile = PromiseDIScope->getFile();
+ DILocation *DILoc = PromiseDDI->getDebugLoc().get();
+ unsigned LineNum = PromiseDIVariable->getLine();
+
+ DICompositeType *FrameDITy = DBuilder.createStructType(
+ DIS, "__coro_frame_ty", DFile, LineNum, Shape.FrameSize * 8,
+ Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
+ llvm::DINodeArray());
+ StructType *FrameTy = Shape.FrameTy;
+ SmallVector<Metadata *, 16> Elements;
+ DataLayout Layout = F.getParent()->getDataLayout();
+
+ DenseMap<Value *, DILocalVariable *> DIVarCache;
+ cacheDIVar(FrameData, DIVarCache);
+
+ unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
+ unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
+ unsigned IndexIndex = Shape.SwitchLowering.IndexField;
+
+ DenseMap<unsigned, StringRef> NameCache;
+ NameCache.insert({ResumeIndex, "__resume_fn"});
+ NameCache.insert({DestroyIndex, "__destroy_fn"});
+ NameCache.insert({IndexIndex, "__coro_index"});
+
+ Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),
+ *DestroyFnTy = FrameTy->getElementType(DestroyIndex),
+ *IndexTy = FrameTy->getElementType(IndexIndex);
+
+ DenseMap<unsigned, DIType *> TyCache;
+ TyCache.insert({ResumeIndex,
+ DBuilder.createBasicType("__resume_fn",
+ Layout.getTypeSizeInBits(ResumeFnTy),
+ dwarf::DW_ATE_address)});
+ TyCache.insert(
+ {DestroyIndex, DBuilder.createBasicType(
+ "__destroy_fn", Layout.getTypeSizeInBits(DestroyFnTy),
+ dwarf::DW_ATE_address)});
+
+ /// FIXME: If we fill the field `SizeInBits` with the actual size of
+ /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
+ TyCache.insert({IndexIndex, DBuilder.createBasicType(
+ "__coro_index",
+ (Layout.getTypeSizeInBits(IndexTy) < 8)
+ ? 8
+ : Layout.getTypeSizeInBits(IndexTy),
+ dwarf::DW_ATE_unsigned_char)});
+
+ for (auto *V : FrameData.getAllDefs()) {
+ if (DIVarCache.find(V) == DIVarCache.end())
+ continue;
+
+ auto Index = FrameData.getFieldIndex(V);
+
+ NameCache.insert({Index, DIVarCache[V]->getName()});
+ TyCache.insert({Index, DIVarCache[V]->getType()});
+ }
+
+ // Cache from index to (Align, Offset Pair)
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
+ // The Align and Offset of Resume function and Destroy function are fixed.
+ OffsetCache.insert({ResumeIndex, {8, 0}});
+ OffsetCache.insert({DestroyIndex, {8, 8}});
+ OffsetCache.insert(
+ {IndexIndex,
+ {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
+
+ for (auto *V : FrameData.getAllDefs()) {
+ auto Index = FrameData.getFieldIndex(V);
+
+ OffsetCache.insert(
+ {Index, {FrameData.getAlign(V), FrameData.getOffset(V)}});
+ }
+
+ DenseMap<Type *, DIType *> DITypeCache;
+ // This counter is used to avoid same type names. e.g., there would be
+ // many i32 and i64 types in one coroutine. And we would use i32_0 and
+ // i32_1 to avoid the same type. Since it makes no sense the name of the
+ // fields confilicts with each other.
+ unsigned UnknownTypeNum = 0;
+ for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
+ if (OffsetCache.find(Index) == OffsetCache.end())
+ continue;
+
+ std::string Name;
+ uint64_t SizeInBits;
+ uint32_t AlignInBits;
+ uint64_t OffsetInBits;
+ DIType *DITy = nullptr;
+
+ Type *Ty = FrameTy->getElementType(Index);
+ assert(Ty->isSized() && "We can't handle type which is not sized.\n");
+ SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedSize();
+ AlignInBits = OffsetCache[Index].first * 8;
+ OffsetInBits = OffsetCache[Index].second * 8;
+
+ if (NameCache.find(Index) != NameCache.end()) {
+ Name = NameCache[Index].str();
+ DITy = TyCache[Index];
+ } else {
+ DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);
+ assert(DITy && "SolveDIType shouldn't return nullptr.\n");
+ Name = DITy->getName().str();
+ Name += "_" + std::to_string(UnknownTypeNum);
+ UnknownTypeNum++;
+ }
+
+ Elements.push_back(DBuilder.createMemberType(
+ FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,
+ llvm::DINode::FlagArtificial, DITy));
+ }
+
+ DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));
+
+ auto *FrameDIVar = DBuilder.createAutoVariable(PromiseDIScope, "__coro_frame",
+ DFile, LineNum, FrameDITy,
+ true, DINode::FlagArtificial);
+ assert(FrameDIVar->isValidLocationForIntrinsic(PromiseDDI->getDebugLoc()));
+
+ // Subprogram would have ContainedNodes field which records the debug
+ // variables it contained. So we need to add __coro_frame to the
+ // ContainedNodes of it.
+ //
+ // If we don't add __coro_frame to the RetainedNodes, user may get
+ // `no symbol __coro_frame in context` rather than `__coro_frame`
+ // is optimized out, which is more precise.
+ if (auto *SubProgram = dyn_cast<DISubprogram>(PromiseDIScope)) {
+ auto RetainedNodes = SubProgram->getRetainedNodes();
+ SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
+ RetainedNodes.end());
+ RetainedNodesVec.push_back(FrameDIVar);
+ SubProgram->replaceOperandWith(
+ 7, (MDTuple::get(F.getContext(), RetainedNodesVec)));
+ }
+
+ DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
+ DBuilder.createExpression(), DILoc,
+ Shape.FramePtr->getNextNode());
+}
+
// Build a struct that will keep state for an active coroutine.
// struct f.frame {
// ResumeFnTy ResumeFnAddr;
@@ -734,7 +1101,11 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
return StructType::create(C, Name);
}();
- FrameTypeBuilder B(C, DL);
+ // We will use this value to cap the alignment of spilled values.
+ Optional<Align> MaxFrameAlignment;
+ if (Shape.ABI == coro::ABI::Async)
+ MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
+ FrameTypeBuilder B(C, DL, MaxFrameAlignment);
AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
Optional<FieldIDType> SwitchIndexFieldId;
@@ -786,8 +1157,9 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
// instead of the pointer itself.
if (const Argument *A = dyn_cast<Argument>(S.first))
if (A->hasByValAttr())
- FieldType = FieldType->getPointerElementType();
- FieldIDType Id = B.addField(FieldType, None);
+ FieldType = A->getParamByValType();
+ FieldIDType Id =
+ B.addField(FieldType, None, false /*header*/, true /*IsSpillOfValue*/);
FrameData.setFieldIndex(S.first, Id);
}
@@ -797,15 +1169,18 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
Shape.FrameSize = B.getStructSize();
switch (Shape.ABI) {
- case coro::ABI::Switch:
+ case coro::ABI::Switch: {
// In the switch ABI, remember the switch-index field.
- Shape.SwitchLowering.IndexField =
- B.getLayoutFieldIndex(*SwitchIndexFieldId);
+ auto IndexField = B.getLayoutField(*SwitchIndexFieldId);
+ Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
+ Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
+ Shape.SwitchLowering.IndexOffset = IndexField.Offset;
// Also round the frame size up to a multiple of its alignment, as is
// generally expected in C/C++.
Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
break;
+ }
// In the retcon ABI, remember whether the frame is inline in the storage.
case coro::ABI::Retcon:
@@ -869,7 +1244,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {}
void visit(Instruction &I) {
- UserBBs.insert(I.getParent());
+ Users.insert(&I);
Base::visit(I);
// If the pointer is escaped prior to CoroBegin, we have to assume it would
// be written into before CoroBegin as well.
@@ -972,6 +1347,12 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
handleAlias(GEPI);
}
+ void visitIntrinsicInst(IntrinsicInst &II) {
+ if (II.getIntrinsicID() != Intrinsic::lifetime_start)
+ return Base::visitIntrinsicInst(II);
+ LifetimeStarts.insert(&II);
+ }
+
void visitCallBase(CallBase &CB) {
for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op)
if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))
@@ -1005,18 +1386,40 @@ private:
// after CoroBegin. Each entry contains the instruction and the offset in the
// original Alloca. They need to be recreated after CoroBegin off the frame.
DenseMap<Instruction *, llvm::Optional<APInt>> AliasOffetMap{};
- SmallPtrSet<BasicBlock *, 2> UserBBs{};
+ SmallPtrSet<Instruction *, 4> Users{};
+ SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
bool MayWriteBeforeCoroBegin{false};
mutable llvm::Optional<bool> ShouldLiveOnFrame{};
bool computeShouldLiveOnFrame() const {
+ // If lifetime information is available, we check it first since it's
+ // more precise. We look at every pair of lifetime.start intrinsic and
+ // every basic block that uses the pointer to see if they cross suspension
+ // points. The uses cover both direct uses as well as indirect uses.
+ if (!LifetimeStarts.empty()) {
+ for (auto *I : Users)
+ for (auto *S : LifetimeStarts)
+ if (Checker.isDefinitionAcrossSuspend(*S, I))
+ return true;
+ return false;
+ }
+ // FIXME: Ideally the isEscaped check should come at the beginning.
+ // However there are a few loose ends that need to be fixed first before
+ // we can do that. We need to make sure we are not over-conservative, so
+ // that the data accessed in-between await_suspend and symmetric transfer
+ // is always put on the stack, and also data accessed after coro.end is
+ // always put on the stack (esp the return object). To fix that, we need
+ // to:
+ // 1) Potentially treat sret as nocapture in calls
+ // 2) Special handle the return object and put it on the stack
+ // 3) Utilize lifetime.end intrinsic
if (PI.isEscaped())
return true;
- for (auto *BB1 : UserBBs)
- for (auto *BB2 : UserBBs)
- if (Checker.hasPathCrossingSuspendPoint(BB1, BB2))
+ for (auto *U1 : Users)
+ for (auto *U2 : Users)
+ if (Checker.isDefinitionAcrossSuspend(*U1, U2))
return true;
return false;
@@ -1078,6 +1481,15 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
return CleanupRet;
}
+static void createFramePtr(coro::Shape &Shape) {
+ auto *CB = Shape.CoroBegin;
+ IRBuilder<> Builder(CB->getNextNode());
+ StructType *FrameTy = Shape.FrameTy;
+ PointerType *FramePtrTy = FrameTy->getPointerTo();
+ Shape.FramePtr =
+ cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
+}
+
// Replace all alloca and SSA values that are accessed across suspend points
// with GetElementPointer from coroutine frame + loads and stores. Create an
// AllocaSpillBB that will become the new entry block for the resume parts of
@@ -1104,11 +1516,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
coro::Shape &Shape) {
auto *CB = Shape.CoroBegin;
LLVMContext &C = CB->getContext();
- IRBuilder<> Builder(CB->getNextNode());
+ IRBuilder<> Builder(C);
StructType *FrameTy = Shape.FrameTy;
- PointerType *FramePtrTy = FrameTy->getPointerTo();
- auto *FramePtr =
- cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
+ Instruction *FramePtr = Shape.FramePtr;
DominatorTree DT(*CB->getFunction());
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
@@ -1152,6 +1562,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
for (auto const &E : FrameData.Spills) {
Value *Def = E.first;
+ auto SpillAlignment = Align(FrameData.getAlign(Def));
// Create a store instruction storing the value into the
// coroutine frame.
Instruction *InsertPt = nullptr;
@@ -1208,9 +1619,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
// instead of the pointer itself.
auto *Value =
Builder.CreateLoad(Def->getType()->getPointerElementType(), Def);
- Builder.CreateStore(Value, G);
+ Builder.CreateAlignedStore(Value, G, SpillAlignment);
} else {
- Builder.CreateStore(Def, G);
+ Builder.CreateAlignedStore(Def, G, SpillAlignment);
}
BasicBlock *CurrentBlock = nullptr;
@@ -1228,9 +1639,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
if (NeedToCopyArgPtrValue)
CurrentReload = GEP;
else
- CurrentReload = Builder.CreateLoad(
+ CurrentReload = Builder.CreateAlignedLoad(
FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
- E.first->getName() + Twine(".reload"));
+ SpillAlignment, E.first->getName() + Twine(".reload"));
TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
for (DbgDeclareInst *DDI : DIs) {
@@ -1244,7 +1655,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
&*Builder.GetInsertPoint());
// This dbg.declare is for the main function entry point. It
// will be deleted in all coro-split functions.
- coro::salvageDebugInfo(DbgPtrAllocaCache, DDI);
+ coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.ReuseFrameSlot);
}
}
@@ -1292,8 +1703,8 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
}
// If we found any alloca, replace all of their remaining uses with GEP
- // instructions. Because new dbg.declare have been created for these alloca,
- // we also delete the original dbg.declare and replace other uses with undef.
+ // instructions. To remain debugbility, we replace the uses of allocas for
+ // dbg.declares and dbg.values with the reload from the frame.
// Note: We cannot replace the alloca with GEP instructions indiscriminately,
// as some of the uses may not be dominated by CoroBegin.
Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
@@ -1311,17 +1722,10 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
auto *G = GetFramePointer(Alloca);
G->setName(Alloca->getName() + Twine(".reload.addr"));
- SmallPtrSet<BasicBlock *, 4> SeenDbgBBs;
- TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Alloca);
- if (!DIs.empty())
- DIBuilder(*Alloca->getModule(),
- /*AllowUnresolved*/ false)
- .insertDeclare(G, DIs.front()->getVariable(),
- DIs.front()->getExpression(),
- DIs.front()->getDebugLoc(), DIs.front());
- for (auto *DI : FindDbgDeclareUses(Alloca))
- DI->eraseFromParent();
- replaceDbgUsesWithUndef(Alloca);
+ SmallVector<DbgVariableIntrinsic *, 4> DIs;
+ findDbgUsers(DIs, Alloca);
+ for (auto *DVI : DIs)
+ DVI->replaceUsesOfWith(Alloca, G);
for (Instruction *I : UsersToUpdate)
I->replaceUsesOfWith(Alloca, G);
@@ -1347,7 +1751,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
auto *FramePtrRaw =
Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));
auto *AliasPtr = Builder.CreateGEP(
- FramePtrRaw,
+ Type::getInt8Ty(C), FramePtrRaw,
ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue()));
auto *AliasPtrTyped =
Builder.CreateBitCast(AliasPtr, Alias.first->getType());
@@ -1358,77 +1762,6 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
return FramePtr;
}
-// Sets the unwind edge of an instruction to a particular successor.
-static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
- if (auto *II = dyn_cast<InvokeInst>(TI))
- II->setUnwindDest(Succ);
- else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
- CS->setUnwindDest(Succ);
- else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))
- CR->setUnwindDest(Succ);
- else
- llvm_unreachable("unexpected terminator instruction");
-}
-
-// Replaces all uses of OldPred with the NewPred block in all PHINodes in a
-// block.
-static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
- BasicBlock *NewPred, PHINode *Until = nullptr) {
- unsigned BBIdx = 0;
- for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
-
- // We manually update the LandingPadReplacement PHINode and it is the last
- // PHI Node. So, if we find it, we are done.
- if (Until == PN)
- break;
-
- // Reuse the previous value of BBIdx if it lines up. In cases where we
- // have multiple phi nodes with *lots* of predecessors, this is a speed
- // win because we don't have to scan the PHI looking for TIBB. This
- // happens because the BB list of PHI nodes are usually in the same
- // order.
- if (PN->getIncomingBlock(BBIdx) != OldPred)
- BBIdx = PN->getBasicBlockIndex(OldPred);
-
- assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!");
- PN->setIncomingBlock(BBIdx, NewPred);
- }
-}
-
-// Uses SplitEdge unless the successor block is an EHPad, in which case do EH
-// specific handling.
-static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
- LandingPadInst *OriginalPad,
- PHINode *LandingPadReplacement) {
- auto *PadInst = Succ->getFirstNonPHI();
- if (!LandingPadReplacement && !PadInst->isEHPad())
- return SplitEdge(BB, Succ);
-
- auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ);
- setUnwindEdgeTo(BB->getTerminator(), NewBB);
- updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);
-
- if (LandingPadReplacement) {
- auto *NewLP = OriginalPad->clone();
- auto *Terminator = BranchInst::Create(Succ, NewBB);
- NewLP->insertBefore(Terminator);
- LandingPadReplacement->addIncoming(NewLP, NewBB);
- return NewBB;
- }
- Value *ParentPad = nullptr;
- if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))
- ParentPad = FuncletPad->getParentPad();
- else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))
- ParentPad = CatchSwitch->getParentPad();
- else
- llvm_unreachable("handling for other EHPads not implemented yet");
-
- auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB);
- CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);
- return NewBB;
-}
-
// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
// PHI in InsertedBB.
static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
@@ -1524,6 +1857,24 @@ static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
}
}
+static void cleanupSinglePredPHIs(Function &F) {
+ SmallVector<PHINode *, 32> Worklist;
+ for (auto &BB : F) {
+ for (auto &Phi : BB.phis()) {
+ if (Phi.getNumIncomingValues() == 1) {
+ Worklist.push_back(&Phi);
+ } else
+ break;
+ }
+ }
+ while (!Worklist.empty()) {
+ auto *Phi = Worklist.back();
+ Worklist.pop_back();
+ auto *OriginalValue = Phi->getIncomingValue(0);
+ Phi->replaceAllUsesWith(OriginalValue);
+ }
+}
+
static void rewritePHIs(BasicBlock &BB) {
// For every incoming edge we will create a block holding all
// incoming values in a single PHI nodes.
@@ -1631,11 +1982,16 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
for (Instruction *U : E.second) {
// If we have not seen this block, materialize the value.
if (CurrentBlock != U->getParent()) {
- CurrentBlock = U->getParent();
+
+ bool IsInCoroSuspendBlock = isa<AnyCoroSuspendInst>(U);
+ CurrentBlock = IsInCoroSuspendBlock
+ ? U->getParent()->getSinglePredecessor()
+ : U->getParent();
CurrentMaterialization = cast<Instruction>(Def)->clone();
CurrentMaterialization->setName(Def->getName());
CurrentMaterialization->insertBefore(
- &*CurrentBlock->getFirstInsertionPt());
+ IsInCoroSuspendBlock ? CurrentBlock->getTerminator()
+ : &*CurrentBlock->getFirstInsertionPt());
}
if (auto *PN = dyn_cast<PHINode>(U)) {
assert(PN->getNumIncomingValues() == 1 &&
@@ -2122,24 +2478,6 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
static void collectFrameAllocas(Function &F, coro::Shape &Shape,
const SuspendCrossingInfo &Checker,
SmallVectorImpl<AllocaInfo> &Allocas) {
- // Collect lifetime.start info for each alloca.
- using LifetimeStart = SmallPtrSet<Instruction *, 2>;
- llvm::DenseMap<AllocaInst *, std::unique_ptr<LifetimeStart>> LifetimeMap;
- for (Instruction &I : instructions(F)) {
- auto *II = dyn_cast<IntrinsicInst>(&I);
- if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)
- continue;
-
- if (auto *OpInst = dyn_cast<Instruction>(II->getOperand(1))) {
- if (auto *AI = dyn_cast<AllocaInst>(OpInst->stripPointerCasts())) {
-
- if (LifetimeMap.find(AI) == LifetimeMap.end())
- LifetimeMap[AI] = std::make_unique<LifetimeStart>();
- LifetimeMap[AI]->insert(isa<AllocaInst>(OpInst) ? II : OpInst);
- }
- }
- }
-
for (Instruction &I : instructions(F)) {
auto *AI = dyn_cast<AllocaInst>(&I);
if (!AI)
@@ -2149,23 +2487,6 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
if (AI == Shape.SwitchLowering.PromiseAlloca) {
continue;
}
- bool ShouldLiveOnFrame = false;
- auto Iter = LifetimeMap.find(AI);
- if (Iter != LifetimeMap.end()) {
- // Check against lifetime.start if the instruction has the info.
- for (User *U : I.users()) {
- for (auto *S : *Iter->second)
- if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U)))
- break;
- if (ShouldLiveOnFrame)
- break;
- }
- if (!ShouldLiveOnFrame)
- continue;
- }
- // At this point, either ShouldLiveOnFrame is true or we didn't have
- // lifetime information. We will need to rely on more precise pointer
- // tracking.
DominatorTree DT(F);
AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,
*Shape.CoroBegin, Checker};
@@ -2179,58 +2500,94 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
void coro::salvageDebugInfo(
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
- DbgDeclareInst *DDI, bool LoadFromFramePtr) {
- Function *F = DDI->getFunction();
+ DbgVariableIntrinsic *DVI, bool ReuseFrameSlot) {
+ Function *F = DVI->getFunction();
IRBuilder<> Builder(F->getContext());
auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
while (isa<IntrinsicInst>(InsertPt))
++InsertPt;
Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
- DIExpression *Expr = DDI->getExpression();
+ DIExpression *Expr = DVI->getExpression();
// Follow the pointer arithmetic all the way to the incoming
// function argument and convert into a DIExpression.
- Value *Storage = DDI->getAddress();
+ bool OutermostLoad = true;
+ Value *Storage = DVI->getVariableLocationOp(0);
+ Value *OriginalStorage = Storage;
while (Storage) {
if (auto *LdInst = dyn_cast<LoadInst>(Storage)) {
Storage = LdInst->getOperand(0);
+ // FIXME: This is a heuristic that works around the fact that
+ // LLVM IR debug intrinsics cannot yet distinguish between
+ // memory and value locations: Because a dbg.declare(alloca) is
+ // implicitly a memory location no DW_OP_deref operation for the
+ // last direct load from an alloca is necessary. This condition
+ // effectively drops the *last* DW_OP_deref in the expression.
+ if (!OutermostLoad)
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+ OutermostLoad = false;
} else if (auto *StInst = dyn_cast<StoreInst>(Storage)) {
Storage = StInst->getOperand(0);
} else if (auto *GEPInst = dyn_cast<GetElementPtrInst>(Storage)) {
- Expr = llvm::salvageDebugInfoImpl(*GEPInst, Expr,
- /*WithStackValue=*/false);
+ SmallVector<Value *> AdditionalValues;
+ DIExpression *SalvagedExpr = llvm::salvageDebugInfoImpl(
+ *GEPInst, Expr,
+ /*WithStackValue=*/false, 0, AdditionalValues);
+ // Debug declares cannot currently handle additional location
+ // operands.
+ if (!SalvagedExpr || !AdditionalValues.empty())
+ break;
+ Expr = SalvagedExpr;
Storage = GEPInst->getOperand(0);
} else if (auto *BCInst = dyn_cast<llvm::BitCastInst>(Storage))
Storage = BCInst->getOperand(0);
else
break;
}
+ if (!Storage)
+ return;
+
// Store a pointer to the coroutine frame object in an alloca so it
// is available throughout the function when producing unoptimized
// code. Extending the lifetime this way is correct because the
// variable has been declared by a dbg.declare intrinsic.
- if (auto Arg = dyn_cast_or_null<llvm::Argument>(Storage)) {
- auto &Cached = DbgPtrAllocaCache[Storage];
- if (!Cached) {
- Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
- Arg->getName() + ".debug");
- Builder.CreateStore(Storage, Cached);
+ //
+ // Avoid to create the alloca would be eliminated by optimization
+ // passes and the corresponding dbg.declares would be invalid.
+ if (!ReuseFrameSlot && !EnableReuseStorageInFrame)
+ if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {
+ auto &Cached = DbgPtrAllocaCache[Storage];
+ if (!Cached) {
+ Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
+ Arg->getName() + ".debug");
+ Builder.CreateStore(Storage, Cached);
+ }
+ Storage = Cached;
+ // FIXME: LLVM lacks nuanced semantics to differentiate between
+ // memory and direct locations at the IR level. The backend will
+ // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
+ // location. Thus, if there are deref and offset operations in the
+ // expression, we need to add a DW_OP_deref at the *start* of the
+ // expression to first load the contents of the alloca before
+ // adjusting it with the expression.
+ if (Expr && Expr->isComplex())
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
}
- Storage = Cached;
- }
- // The FramePtr object adds one extra layer of indirection that
- // needs to be unwrapped.
- if (LoadFromFramePtr)
- Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
- auto &VMContext = DDI->getFunction()->getContext();
- DDI->setOperand(
- 0, MetadataAsValue::get(VMContext, ValueAsMetadata::get(Storage)));
- DDI->setOperand(2, MetadataAsValue::get(VMContext, Expr));
- if (auto *InsertPt = dyn_cast_or_null<Instruction>(Storage))
- DDI->moveAfter(InsertPt);
+
+ DVI->replaceVariableLocationOp(OriginalStorage, Storage);
+ DVI->setExpression(Expr);
+ /// It makes no sense to move the dbg.value intrinsic.
+ if (!isa<DbgValueInst>(DVI)) {
+ if (auto *InsertPt = dyn_cast<Instruction>(Storage))
+ DVI->moveAfter(InsertPt);
+ else if (isa<Argument>(Storage))
+ DVI->moveAfter(F->getEntryBlock().getFirstNonPHI());
+ }
}
void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
- eliminateSwiftError(F, Shape);
+ // Don't eliminate swifterror in async functions that won't be split.
+ if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
+ eliminateSwiftError(F, Shape);
if (Shape.ABI == coro::ABI::Switch &&
Shape.SwitchLowering.PromiseAlloca) {
@@ -2267,6 +2624,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
}
}
+ // Later code makes structural assumptions about single predecessors phis e.g
+ // that they are not live accross a suspend point.
+ cleanupSinglePredPHIs(F);
+
// Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
// never has its definition separated from the PHI by the suspend point.
rewritePHIs(F);
@@ -2284,11 +2645,19 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
for (int Repeat = 0; Repeat < 4; ++Repeat) {
// See if there are materializable instructions across suspend points.
for (Instruction &I : instructions(F))
- if (materializable(I))
+ if (materializable(I)) {
for (User *U : I.users())
if (Checker.isDefinitionAcrossSuspend(I, U))
Spills[&I].push_back(cast<Instruction>(U));
+ // Manually add dbg.value metadata uses of I.
+ SmallVector<DbgValueInst *, 16> DVIs;
+ findDbgValues(DVIs, &I);
+ for (auto *DVI : DVIs)
+ if (Checker.isDefinitionAcrossSuspend(I, DVI))
+ Spills[&I].push_back(DVI);
+ }
+
if (Spills.empty())
break;
@@ -2301,7 +2670,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
}
sinkLifetimeStartMarkers(F, Shape, Checker);
- collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);
+ if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
+ collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);
LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
// Collect the spills for arguments and other not-materializable values.
@@ -2360,12 +2730,30 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
FrameData.Spills[&I].push_back(cast<Instruction>(U));
}
}
+
+ // We don't want the layout of coroutine frame to be affected
+ // by debug information. So we only choose to salvage DbgValueInst for
+ // whose value is already in the frame.
+ // We would handle the dbg.values for allocas specially
+ for (auto &Iter : FrameData.Spills) {
+ auto *V = Iter.first;
+ SmallVector<DbgValueInst *, 16> DVIs;
+ findDbgValues(DVIs, V);
+ llvm::for_each(DVIs, [&](DbgValueInst *DVI) {
+ if (Checker.isDefinitionAcrossSuspend(*V, DVI))
+ FrameData.Spills[V].push_back(DVI);
+ });
+ }
+
LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
Shape.ABI == coro::ABI::Async)
sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin);
Shape.FrameTy = buildFrameType(F, Shape, FrameData);
- Shape.FramePtr = insertSpills(FrameData, Shape);
+ createFramePtr(Shape);
+ // For now, this works for C++ programs only.
+ buildFrameDebugInfo(F, Shape, FrameData);
+ insertSpills(FrameData, Shape);
lowerLocalAllocas(LocalAllocas, DeadInstructions);
for (auto I : DeadInstructions)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
index 9fa2fd12f80b..5ed800d67fe9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -376,6 +376,18 @@ public:
}
};
+/// This represents the llvm.coro.async.size.replace instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAsyncSizeReplace : public IntrinsicInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_async_size_replace;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This represents the llvm.coro.frame instruction.
class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst {
public:
@@ -511,11 +523,21 @@ inline CoroSaveInst *AnyCoroSuspendInst::getCoroSave() const {
/// This represents the llvm.coro.suspend.async instruction.
class LLVM_LIBRARY_VISIBILITY CoroSuspendAsyncInst : public AnyCoroSuspendInst {
- enum { ResumeFunctionArg, AsyncContextProjectionArg, MustTailCallFuncArg };
-
public:
+ enum {
+ StorageArgNoArg,
+ ResumeFunctionArg,
+ AsyncContextProjectionArg,
+ MustTailCallFuncArg
+ };
+
void checkWellFormed() const;
+ unsigned getStorageArgumentIndex() const {
+ auto *Arg = cast<ConstantInt>(getArgOperand(StorageArgNoArg));
+ return Arg->getZExtValue();
+ }
+
Function *getAsyncContextProjectionFunction() const {
return cast<Function>(
getArgOperand(AsyncContextProjectionArg)->stripPointerCasts());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 6c0e52f24542..27ba8524f975 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -54,7 +54,7 @@ void updateCallGraph(Function &Caller, ArrayRef<Function *> Funcs,
/// holding a pointer to the coroutine frame.
void salvageDebugInfo(
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
- DbgDeclareInst *DDI, bool LoadFromCoroFrame = false);
+ DbgVariableIntrinsic *DVI, bool ReuseFrameSlot);
// Keeps data and helper functions for lowering coroutine intrinsics.
struct LowererBase {
@@ -125,6 +125,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
Instruction *FramePtr;
BasicBlock *AllocaSpillBlock;
+ /// This would only be true if optimization are enabled.
bool ReuseFrameSlot;
struct SwitchLoweringStorage {
@@ -132,6 +133,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
AllocaInst *PromiseAlloca;
BasicBlock *ResumeEntryBlock;
unsigned IndexField;
+ unsigned IndexAlign;
+ unsigned IndexOffset;
bool HasFinalSuspend;
};
@@ -146,6 +149,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
struct AsyncLoweringStorage {
FunctionType *AsyncFuncTy;
Value *Context;
+ CallingConv::ID AsyncCC;
unsigned ContextArgNo;
uint64_t ContextHeaderSize;
uint64_t ContextAlignment;
@@ -208,7 +212,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
case coro::ABI::RetconOnce:
return RetconLowering.ResumePrototype->getFunctionType();
case coro::ABI::Async:
- return AsyncLowering.AsyncFuncTy;
+ // Not used. The function type depends on the active suspend.
+ return nullptr;
}
llvm_unreachable("Unknown coro::ABI enum");
@@ -245,7 +250,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
case coro::ABI::RetconOnce:
return RetconLowering.ResumePrototype->getCallingConv();
case coro::ABI::Async:
- return CallingConv::Swift;
+ return AsyncLowering.AsyncCC;
}
llvm_unreachable("Unknown coro::ABI enum");
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index c4d7db9153e2..b6932dbbfc3f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
@@ -37,6 +38,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -367,7 +369,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
coro::Shape::SwitchFieldIndex::Resume,
"ResumeFn.addr");
auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
- cast<PointerType>(GepIndex->getType())->getElementType()));
+ FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume)));
Builder.CreateStore(NullPtr, GepIndex);
} else {
auto *GepIndex = Builder.CreateStructGEP(
@@ -454,16 +456,29 @@ void CoroCloner::handleFinalSuspend() {
}
}
+static FunctionType *
+getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) {
+ auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend);
+ auto *StructTy = cast<StructType>(AsyncSuspend->getType());
+ auto &Context = Suspend->getParent()->getParent()->getContext();
+ auto *VoidTy = Type::getVoidTy(Context);
+ return FunctionType::get(VoidTy, StructTy->elements(), false);
+}
+
static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
const Twine &Suffix,
- Module::iterator InsertBefore) {
+ Module::iterator InsertBefore,
+ AnyCoroSuspendInst *ActiveSuspend) {
Module *M = OrigF.getParent();
- auto *FnTy = Shape.getResumeFunctionType();
+ auto *FnTy = (Shape.ABI != coro::ABI::Async)
+ ? Shape.getResumeFunctionType()
+ : getFunctionTypeFromAsyncSuspend(ActiveSuspend);
Function *NewF =
Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
OrigF.getName() + Suffix);
- NewF->addParamAttr(0, Attribute::NonNull);
+ if (Shape.ABI != coro::ABI::Async)
+ NewF->addParamAttr(0, Attribute::NonNull);
// For the async lowering ABI we can't guarantee that the context argument is
// not access via a different pointer not based on the argument.
@@ -572,6 +587,8 @@ void CoroCloner::replaceCoroEnds() {
static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
ValueToValueMapTy *VMap) {
+ if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty())
+ return;
Value *CachedSlot = nullptr;
auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
if (CachedSlot) {
@@ -633,34 +650,34 @@ void CoroCloner::replaceSwiftErrorOps() {
}
void CoroCloner::salvageDebugInfo() {
- SmallVector<DbgDeclareInst *, 8> Worklist;
+ SmallVector<DbgVariableIntrinsic *, 8> Worklist;
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
for (auto &BB : *NewF)
for (auto &I : BB)
- if (auto *DDI = dyn_cast<DbgDeclareInst>(&I))
- Worklist.push_back(DDI);
- for (DbgDeclareInst *DDI : Worklist) {
- // This is a heuristic that detects declares left by CoroFrame.
- bool LoadFromFramePtr = !isa<AllocaInst>(DDI->getAddress());
- coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, LoadFromFramePtr);
- }
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
+ Worklist.push_back(DVI);
+ for (DbgVariableIntrinsic *DVI : Worklist)
+ coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.ReuseFrameSlot);
+
// Remove all salvaged dbg.declare intrinsics that became
// either unreachable or stale due to the CoroSplit transformation.
+ DominatorTree DomTree(*NewF);
auto IsUnreachableBlock = [&](BasicBlock *BB) {
- return BB->hasNPredecessors(0) && BB != &NewF->getEntryBlock();
+ return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr,
+ &DomTree);
};
- for (DbgDeclareInst *DDI : Worklist) {
- if (IsUnreachableBlock(DDI->getParent()))
- DDI->eraseFromParent();
- else if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ for (DbgVariableIntrinsic *DVI : Worklist) {
+ if (IsUnreachableBlock(DVI->getParent()))
+ DVI->eraseFromParent();
+ else if (dyn_cast_or_null<AllocaInst>(DVI->getVariableLocationOp(0))) {
// Count all non-debuginfo uses in reachable blocks.
unsigned Uses = 0;
- for (auto *User : DDI->getAddress()->users())
+ for (auto *User : DVI->getVariableLocationOp(0)->users())
if (auto *I = dyn_cast<Instruction>(User))
if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent()))
++Uses;
if (!Uses)
- DDI->eraseFromParent();
+ DVI->eraseFromParent();
}
}
}
@@ -717,15 +734,17 @@ void CoroCloner::replaceEntryBlock() {
}
}
- // Any alloca that's still being used but not reachable from the new entry
- // needs to be moved to the new entry.
+ // Any static alloca that's still being used but not reachable from the new
+ // entry needs to be moved to the new entry.
Function *F = OldEntry->getParent();
DominatorTree DT{*F};
for (auto IT = inst_begin(F), End = inst_end(F); IT != End;) {
Instruction &I = *IT++;
- if (!isa<AllocaInst>(&I) || I.use_empty())
+ auto *Alloca = dyn_cast<AllocaInst>(&I);
+ if (!Alloca || I.use_empty())
continue;
- if (DT.isReachableFromEntry(I.getParent()))
+ if (DT.isReachableFromEntry(I.getParent()) ||
+ !isa<ConstantInt>(Alloca->getArraySize()))
continue;
I.moveBefore(*Entry, Entry->getFirstInsertionPt());
}
@@ -745,10 +764,12 @@ Value *CoroCloner::deriveNewFramePointer() {
// with the active suspend. The frame is located as a tail to the async
// context header.
case coro::ABI::Async: {
- auto *CalleeContext = NewF->getArg(Shape.AsyncLowering.ContextArgNo);
+ auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
+ auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff;
+ auto *CalleeContext = NewF->getArg(ContextIdx);
auto *FramePtrTy = Shape.FrameTy->getPointerTo();
- auto *ProjectionFunc = cast<CoroSuspendAsyncInst>(ActiveSuspend)
- ->getAsyncContextProjectionFunction();
+ auto *ProjectionFunc =
+ ActiveAsyncSuspend->getAsyncContextProjectionFunction();
auto DbgLoc =
cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc();
// Calling i8* (i8*)
@@ -799,13 +820,27 @@ static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
}
+static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context,
+ unsigned ParamIndex) {
+ AttrBuilder ParamAttrs;
+ ParamAttrs.addAttribute(Attribute::SwiftAsync);
+ Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
+}
+
+static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context,
+ unsigned ParamIndex) {
+ AttrBuilder ParamAttrs;
+ ParamAttrs.addAttribute(Attribute::SwiftSelf);
+ Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
+}
+
/// Clone the body of the original function into a resume function of
/// some sort.
void CoroCloner::create() {
// Create the new function if we don't already have one.
if (!NewF) {
NewF = createCloneDeclaration(OrigF, Shape, Suffix,
- OrigF.getParent()->end());
+ OrigF.getParent()->end(), ActiveSuspend);
}
// Replace all args with undefs. The buildCoroutineFrame algorithm already
@@ -828,15 +863,41 @@ void CoroCloner::create() {
auto savedLinkage = NewF->getLinkage();
NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
- CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns);
+ CloneFunctionInto(NewF, &OrigF, VMap,
+ CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+ auto &Context = NewF->getContext();
+
+ // For async functions / continuations, adjust the scope line of the
+ // clone to the line number of the suspend point. However, only
+ // adjust the scope line when the files are the same. This ensures
+ // line number and file name belong together. The scope line is
+ // associated with all pre-prologue instructions. This avoids a jump
+ // in the linetable from the function declaration to the suspend point.
+ if (DISubprogram *SP = NewF->getSubprogram()) {
+ assert(SP != OrigF.getSubprogram() && SP->isDistinct());
+ if (ActiveSuspend)
+ if (auto DL = ActiveSuspend->getDebugLoc())
+ if (SP->getFile() == DL->getFile())
+ SP->setScopeLine(DL->getLine());
+ // Update the linkage name to reflect the modified symbol name. It
+ // is necessary to update the linkage name in Swift, since the
+ // mangling changes for resume functions. It might also be the
+ // right thing to do in C++, but due to a limitation in LLVM's
+ // AsmPrinter we can only do this if the function doesn't have an
+ // abstract specification, since the DWARF backend expects the
+ // abstract specification to contain the linkage name and asserts
+ // that they are identical.
+ if (!SP->getDeclaration() && SP->getUnit() &&
+ SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift)
+ SP->replaceLinkageName(MDString::get(Context, NewF->getName()));
+ }
NewF->setLinkage(savedLinkage);
NewF->setVisibility(savedVisibility);
NewF->setUnnamedAddr(savedUnnamedAddr);
NewF->setDLLStorageClass(savedDLLStorageClass);
- auto &Context = NewF->getContext();
-
// Replace the attributes of the new function:
auto OrigAttrs = NewF->getAttributes();
auto NewAttrs = AttributeList();
@@ -851,8 +912,28 @@ void CoroCloner::create() {
addFramePointerAttrs(NewAttrs, Context, 0,
Shape.FrameSize, Shape.FrameAlign);
break;
- case coro::ABI::Async:
+ case coro::ABI::Async: {
+ auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
+ if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo,
+ Attribute::SwiftAsync)) {
+ uint32_t ArgAttributeIndices =
+ ActiveAsyncSuspend->getStorageArgumentIndex();
+ auto ContextArgIndex = ArgAttributeIndices & 0xff;
+ addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex);
+
+ // `swiftasync` must preceed `swiftself` so 0 is not a valid index for
+ // `swiftself`.
+ auto SwiftSelfIndex = ArgAttributeIndices >> 8;
+ if (SwiftSelfIndex)
+ addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex);
+ }
+
+ // Transfer the original function's attributes.
+ auto FnAttrs = OrigF.getAttributes().getFnAttributes();
+ NewAttrs =
+ NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, FnAttrs);
break;
+ }
case coro::ABI::Retcon:
case coro::ABI::RetconOnce:
// If we have a continuation prototype, just use its attributes,
@@ -874,7 +955,7 @@ void CoroCloner::create() {
case coro::ABI::RetconOnce:
// Remove old returns.
for (ReturnInst *Return : Returns)
- changeToUnreachable(Return, /*UseLLVMTrap=*/false);
+ changeToUnreachable(Return);
break;
// With multi-suspend continuations, we'll already have eliminated the
@@ -1068,17 +1149,6 @@ static void postSplitCleanup(Function &F) {
// pass to FPM below because it will also verify all the global data.
if (verifyFunction(F, &errs()))
report_fatal_error("Broken function");
-
- legacy::FunctionPassManager FPM(F.getParent());
-
- FPM.add(createSCCPPass());
- FPM.add(createCFGSimplificationPass());
- FPM.add(createEarlyCSEPass());
- FPM.add(createCFGSimplificationPass());
-
- FPM.doInitialization();
- FPM.run(F);
- FPM.doFinalization();
}
// Assuming we arrived at the block NewBlock from Prev instruction, store
@@ -1245,6 +1315,7 @@ static void handleNoSuspendCoroutine(coro::Shape &Shape) {
} else {
CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
}
+
break;
}
case coro::ABI::Async:
@@ -1453,7 +1524,8 @@ static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,
auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy);
ResumeIntrinsic->replaceAllUsesWith(Val);
ResumeIntrinsic->eraseFromParent();
- Suspend->setOperand(0, UndefValue::get(Int8PtrTy));
+ Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg,
+ UndefValue::get(Int8PtrTy));
}
/// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs.
@@ -1528,8 +1600,23 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
auto *Suspend = cast<CoroSuspendAsyncInst>(Shape.CoroSuspends[Idx]);
// Create the clone declaration.
- auto *Continuation =
- createCloneDeclaration(F, Shape, ".resume." + Twine(Idx), NextF);
+ auto ResumeNameSuffix = ".resume.";
+ auto ProjectionFunctionName =
+ Suspend->getAsyncContextProjectionFunction()->getName();
+ bool UseSwiftMangling = false;
+ if (ProjectionFunctionName.equals("__swift_async_resume_project_context")) {
+ ResumeNameSuffix = "TQ";
+ UseSwiftMangling = true;
+ } else if (ProjectionFunctionName.equals(
+ "__swift_async_resume_get_context")) {
+ ResumeNameSuffix = "TY";
+ UseSwiftMangling = true;
+ }
+ auto *Continuation = createCloneDeclaration(
+ F, Shape,
+ UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_"
+ : ResumeNameSuffix + Twine(Idx),
+ NextF, Suspend);
Clones.push_back(Continuation);
// Insert a branch to a new return block immediately before the suspend
@@ -1548,7 +1635,8 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
// Insert the call to the tail call function and inline it.
auto *Fn = Suspend->getMustTailCallFunction();
SmallVector<Value *, 8> Args(Suspend->args());
- auto FnArgs = ArrayRef<Value *>(Args).drop_front(3);
+ auto FnArgs = ArrayRef<Value *>(Args).drop_front(
+ CoroSuspendAsyncInst::MustTailCallFuncArg + 1);
auto *TailCall =
coro::createMustTailCall(Suspend->getDebugLoc(), Fn, FnArgs, Builder);
Builder.CreateRetVoid();
@@ -1629,7 +1717,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
// Create the clone declaration.
auto Continuation =
- createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF);
+ createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF, nullptr);
Clones.push_back(Continuation);
// Insert a branch to the unified return block immediately before
@@ -1798,7 +1886,8 @@ static void updateCallGraphAfterCoroutineSplit(
case coro::ABI::RetconOnce:
// Each clone in the Async/Retcon lowering references of the other clones.
// Let the LazyCallGraph know about all of them at once.
- CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones);
+ if (!Clones.empty())
+ CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones);
break;
}
@@ -2049,28 +2138,21 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
// Split all the coroutines.
for (LazyCallGraph::Node *N : Coroutines) {
Function &F = N->getFunction();
- Attribute Attr = F.getFnAttribute(CORO_PRESPLIT_ATTR);
- StringRef Value = Attr.getValueAsString();
LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
- << "' state: " << Value << "\n");
- if (Value == UNPREPARED_FOR_SPLIT) {
- // Enqueue a second iteration of the CGSCC pipeline on this SCC.
- UR.CWorklist.insert(&C);
- F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
- continue;
- }
+ << "' state: "
+ << F.getFnAttribute(CORO_PRESPLIT_ATTR).getValueAsString()
+ << "\n");
F.removeFnAttr(CORO_PRESPLIT_ATTR);
SmallVector<Function *, 4> Clones;
const coro::Shape Shape = splitCoroutine(F, Clones, ReuseFrameSlot);
updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
- if ((Shape.ABI == coro::ABI::Async || Shape.ABI == coro::ABI::Retcon ||
- Shape.ABI == coro::ABI::RetconOnce) &&
- !Shape.CoroSuspends.empty()) {
- // Run the CGSCC pipeline on the newly split functions.
- // All clones will be in the same RefSCC, so choose a random clone.
- UR.RCWorklist.insert(CG.lookupRefSCC(CG.get(*Clones[0])));
+ if (!Shape.CoroSuspends.empty()) {
+ // Run the CGSCC pipeline on the original and newly split functions.
+ UR.CWorklist.insert(&C);
+ for (Function *Clone : Clones)
+ UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone)));
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 6699a5c46313..ae2d9e192c87 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -126,6 +126,7 @@ static bool isCoroutineIntrinsicName(StringRef Name) {
"llvm.coro.alloc",
"llvm.coro.async.context.alloc",
"llvm.coro.async.context.dealloc",
+ "llvm.coro.async.size.replace",
"llvm.coro.async.store_resume",
"llvm.coro.begin",
"llvm.coro.destroy",
@@ -360,7 +361,7 @@ void coro::Shape::buildFrom(Function &F) {
// Replace all coro.ends with unreachable instruction.
for (AnyCoroEndInst *CE : CoroEnds)
- changeToUnreachable(CE, /*UseLLVMTrap=*/false);
+ changeToUnreachable(CE);
return;
}
@@ -399,11 +400,7 @@ void coro::Shape::buildFrom(Function &F) {
this->AsyncLowering.ContextAlignment =
AsyncId->getStorageAlignment().value();
this->AsyncLowering.AsyncFuncPointer = AsyncId->getAsyncFunctionPointer();
- auto &Context = F.getContext();
- auto *Int8PtrTy = Type::getInt8PtrTy(Context);
- auto *VoidTy = Type::getVoidTy(Context);
- this->AsyncLowering.AsyncFuncTy =
- FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy, Int8PtrTy}, false);
+ this->AsyncLowering.AsyncCC = F.getCallingConv();
break;
};
case Intrinsic::coro_id_retcon:
@@ -700,7 +697,7 @@ void CoroIdAsyncInst::checkWellFormed() const {
static void checkAsyncContextProjectFunction(const Instruction *I,
Function *F) {
- auto *FunTy = cast<FunctionType>(F->getType()->getPointerElementType());
+ auto *FunTy = cast<FunctionType>(F->getValueType());
if (!FunTy->getReturnType()->isPointerTy() ||
!FunTy->getReturnType()->getPointerElementType()->isIntegerTy(8))
fail(I,