diff options
Diffstat (limited to 'lib/xray/xray_function_call_trie.h')
-rw-r--r-- | lib/xray/xray_function_call_trie.h | 455 |
1 files changed, 455 insertions, 0 deletions
diff --git a/lib/xray/xray_function_call_trie.h b/lib/xray/xray_function_call_trie.h new file mode 100644 index 000000000000..2acf14aa5625 --- /dev/null +++ b/lib/xray/xray_function_call_trie.h @@ -0,0 +1,455 @@ +//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This file defines the interface for a function call trie. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FUNCTION_CALL_TRIE_H +#define XRAY_FUNCTION_CALL_TRIE_H + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "xray_profiling_flags.h" +#include "xray_segmented_array.h" +#include <memory> // For placement new. +#include <utility> + +namespace __xray { + +/// A FunctionCallTrie represents the stack traces of XRay instrumented +/// functions that we've encountered, where a node corresponds to a function and +/// the path from the root to the node its stack trace. Each node in the trie +/// will contain some useful values, including: +/// +/// * The cumulative amount of time spent in this particular node/stack. +/// * The number of times this stack has appeared. +/// * A histogram of latencies for that particular node. +/// +/// Each node in the trie will also contain a list of callees, represented using +/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function +/// ID of the callee, and a pointer to the node. +/// +/// If we visualise this data structure, we'll find the following potential +/// representation: +/// +/// [function id node] -> [callees] [cumulative time] +/// [call counter] [latency histogram] +/// +/// As an example, when we have a function in this pseudocode: +/// +/// func f(N) { +/// g() +/// h() +/// for i := 1..N { j() } +/// } +/// +/// We may end up with a trie of the following form: +/// +/// f -> [ g, h, j ] [...] [1] [...] +/// g -> [ ... ] [...] [1] [...] +/// h -> [ ... ] [...] [1] [...] +/// j -> [ ... ] [...] [N] [...] +/// +/// If for instance the function g() called j() like so: +/// +/// func g() { +/// for i := 1..10 { j() } +/// } +/// +/// We'll find the following updated trie: +/// +/// f -> [ g, h, j ] [...] [1] [...] +/// g -> [ j' ] [...] [1] [...] +/// h -> [ ... ] [...] [1] [...] +/// j -> [ ... ] [...] [N] [...] +/// j' -> [ ... ] [...] [10] [...] +/// +/// Note that we'll have a new node representing the path `f -> g -> j'` with +/// isolated data. This isolation gives us a means of representing the stack +/// traces as a path, as opposed to a key in a table. The alternative +/// implementation here would be to use a separate table for the path, and use +/// hashes of the path as an identifier to accumulate the information. We've +/// moved away from this approach as it takes a lot of time to compute the hash +/// every time we need to update a function's call information as we're handling +/// the entry and exit events. +/// +/// This approach allows us to maintain a shadow stack, which represents the +/// currently executing path, and on function exits quickly compute the amount +/// of time elapsed from the entry, then update the counters for the node +/// already represented in the trie. This necessitates an efficient +/// representation of the various data structures (the list of callees must be +/// cache-aware and efficient to look up, and the histogram must be compact and +/// quick to update) to enable us to keep the overheads of this implementation +/// to the minimum. +class FunctionCallTrie { +public: + struct Node; + + // We use a NodeIdPair type instead of a std::pair<...> to not rely on the + // standard library types in this header. + struct NodeIdPair { + Node *NodePtr; + int32_t FId; + + // Constructor for inplace-construction. + NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {} + }; + + using NodeIdPairArray = Array<NodeIdPair>; + using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; + + // A Node in the FunctionCallTrie gives us a list of callees, the cumulative + // number of times this node actually appeared, the cumulative amount of time + // for this particular node including its children call times, and just the + // local time spent on this node. Each Node will have the ID of the XRay + // instrumented function that it is associated to. + struct Node { + Node *Parent; + NodeIdPairArray Callees; + int64_t CallCount; + int64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. + int32_t FId; + + // We add a constructor here to allow us to inplace-construct through + // Array<...>'s AppendEmplace. + Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT, + int32_t F) + : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT), + FId(F) {} + + // TODO: Include the compact histogram. + }; + +private: + struct ShadowStackEntry { + uint64_t EntryTSC; + Node *NodePtr; + + // We add a constructor here to allow us to inplace-construct through + // Array<...>'s AppendEmplace. + ShadowStackEntry(uint64_t T, Node *N) : EntryTSC{T}, NodePtr{N} {} + }; + + using NodeArray = Array<Node>; + using RootArray = Array<Node *>; + using ShadowStackArray = Array<ShadowStackEntry>; + +public: + // We collate the allocators we need into a single struct, as a convenience to + // allow us to initialize these as a group. + struct Allocators { + using NodeAllocatorType = NodeArray::AllocatorType; + using RootAllocatorType = RootArray::AllocatorType; + using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; + + NodeAllocatorType *NodeAllocator = nullptr; + RootAllocatorType *RootAllocator = nullptr; + ShadowStackAllocatorType *ShadowStackAllocator = nullptr; + NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; + + Allocators() {} + Allocators(const Allocators &) = delete; + Allocators &operator=(const Allocators &) = delete; + + Allocators(Allocators &&O) + : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator), + ShadowStackAllocator(O.ShadowStackAllocator), + NodeIdPairAllocator(O.NodeIdPairAllocator) { + O.NodeAllocator = nullptr; + O.RootAllocator = nullptr; + O.ShadowStackAllocator = nullptr; + O.NodeIdPairAllocator = nullptr; + } + + Allocators &operator=(Allocators &&O) { + { + auto Tmp = O.NodeAllocator; + O.NodeAllocator = this->NodeAllocator; + this->NodeAllocator = Tmp; + } + { + auto Tmp = O.RootAllocator; + O.RootAllocator = this->RootAllocator; + this->RootAllocator = Tmp; + } + { + auto Tmp = O.ShadowStackAllocator; + O.ShadowStackAllocator = this->ShadowStackAllocator; + this->ShadowStackAllocator = Tmp; + } + { + auto Tmp = O.NodeIdPairAllocator; + O.NodeIdPairAllocator = this->NodeIdPairAllocator; + this->NodeIdPairAllocator = Tmp; + } + return *this; + } + + ~Allocators() { + // Note that we cannot use delete on these pointers, as they need to be + // returned to the sanitizer_common library's internal memory tracking + // system. + if (NodeAllocator != nullptr) { + NodeAllocator->~NodeAllocatorType(); + InternalFree(NodeAllocator); + NodeAllocator = nullptr; + } + if (RootAllocator != nullptr) { + RootAllocator->~RootAllocatorType(); + InternalFree(RootAllocator); + RootAllocator = nullptr; + } + if (ShadowStackAllocator != nullptr) { + ShadowStackAllocator->~ShadowStackAllocatorType(); + InternalFree(ShadowStackAllocator); + ShadowStackAllocator = nullptr; + } + if (NodeIdPairAllocator != nullptr) { + NodeIdPairAllocator->~NodeIdPairAllocatorType(); + InternalFree(NodeIdPairAllocator); + NodeIdPairAllocator = nullptr; + } + } + }; + + // TODO: Support configuration of options through the arguments. + static Allocators InitAllocators() { + return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); + } + + static Allocators InitAllocatorsCustom(uptr Max) { + Allocators A; + auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>( + InternalAlloc(sizeof(Allocators::NodeAllocatorType))); + new (NodeAllocator) Allocators::NodeAllocatorType(Max); + A.NodeAllocator = NodeAllocator; + + auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>( + InternalAlloc(sizeof(Allocators::RootAllocatorType))); + new (RootAllocator) Allocators::RootAllocatorType(Max); + A.RootAllocator = RootAllocator; + + auto ShadowStackAllocator = + reinterpret_cast<Allocators::ShadowStackAllocatorType *>( + InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType))); + new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max); + A.ShadowStackAllocator = ShadowStackAllocator; + + auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( + InternalAlloc(sizeof(NodeIdPairAllocatorType))); + new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max); + A.NodeIdPairAllocator = NodeIdPairAllocator; + return A; + } + +private: + NodeArray Nodes; + RootArray Roots; + ShadowStackArray ShadowStack; + NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; + +public: + explicit FunctionCallTrie(const Allocators &A) + : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator), + ShadowStack(*A.ShadowStackAllocator), + NodeIdPairAllocator(A.NodeIdPairAllocator) {} + + void enterFunction(const int32_t FId, uint64_t TSC) { + DCHECK_NE(FId, 0); + // This function primarily deals with ensuring that the ShadowStack is + // consistent and ready for when an exit event is encountered. + if (UNLIKELY(ShadowStack.empty())) { + auto NewRoot = + Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId); + if (UNLIKELY(NewRoot == nullptr)) + return; + Roots.Append(NewRoot); + ShadowStack.AppendEmplace(TSC, NewRoot); + return; + } + + auto &Top = ShadowStack.back(); + auto TopNode = Top.NodePtr; + DCHECK_NE(TopNode, nullptr); + + // If we've seen this callee before, then we just access that node and place + // that on the top of the stack. + auto Callee = TopNode->Callees.find_element( + [FId](const NodeIdPair &NR) { return NR.FId == FId; }); + if (Callee != nullptr) { + CHECK_NE(Callee->NodePtr, nullptr); + ShadowStack.AppendEmplace(TSC, Callee->NodePtr); + return; + } + + // This means we've never seen this stack before, create a new node here. + auto NewNode = + Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId); + if (UNLIKELY(NewNode == nullptr)) + return; + DCHECK_NE(NewNode, nullptr); + TopNode->Callees.AppendEmplace(NewNode, FId); + ShadowStack.AppendEmplace(TSC, NewNode); + DCHECK_NE(ShadowStack.back().NodePtr, nullptr); + return; + } + + void exitFunction(int32_t FId, uint64_t TSC) { + // When we exit a function, we look up the ShadowStack to see whether we've + // entered this function before. We do as little processing here as we can, + // since most of the hard work would have already been done at function + // entry. + uint64_t CumulativeTreeTime = 0; + while (!ShadowStack.empty()) { + const auto &Top = ShadowStack.back(); + auto TopNode = Top.NodePtr; + DCHECK_NE(TopNode, nullptr); + auto LocalTime = TSC - Top.EntryTSC; + TopNode->CallCount++; + TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; + CumulativeTreeTime += LocalTime; + ShadowStack.trim(1); + + // TODO: Update the histogram for the node. + if (TopNode->FId == FId) + break; + } + } + + const RootArray &getRoots() const { return Roots; } + + // The deepCopyInto operation will update the provided FunctionCallTrie by + // re-creating the contents of this particular FunctionCallTrie in the other + // FunctionCallTrie. It will do this using a Depth First Traversal from the + // roots, and while doing so recreating the traversal in the provided + // FunctionCallTrie. + // + // This operation will *not* destroy the state in `O`, and thus may cause some + // duplicate entries in `O` if it is not empty. + // + // This function is *not* thread-safe, and may require external + // synchronisation of both "this" and |O|. + // + // This function must *not* be called with a non-empty FunctionCallTrie |O|. + void deepCopyInto(FunctionCallTrie &O) const { + DCHECK(O.getRoots().empty()); + + // We then push the root into a stack, to use as the parent marker for new + // nodes we push in as we're traversing depth-first down the call tree. + struct NodeAndParent { + FunctionCallTrie::Node *Node; + FunctionCallTrie::Node *NewNode; + }; + using Stack = Array<NodeAndParent>; + + typename Stack::AllocatorType StackAllocator( + profilingFlags()->stack_allocator_max); + Stack DFSStack(StackAllocator); + + for (const auto Root : getRoots()) { + // Add a node in O for this root. + auto NewRoot = O.Nodes.AppendEmplace( + nullptr, *O.NodeIdPairAllocator, Root->CallCount, + Root->CumulativeLocalTime, Root->FId); + + // Because we cannot allocate more memory we should bail out right away. + if (UNLIKELY(NewRoot == nullptr)) + return; + + O.Roots.Append(NewRoot); + + // TODO: Figure out what to do if we fail to allocate any more stack + // space. Maybe warn or report once? + DFSStack.AppendEmplace(Root, NewRoot); + while (!DFSStack.empty()) { + NodeAndParent NP = DFSStack.back(); + DCHECK_NE(NP.Node, nullptr); + DCHECK_NE(NP.NewNode, nullptr); + DFSStack.trim(1); + for (const auto Callee : NP.Node->Callees) { + auto NewNode = O.Nodes.AppendEmplace( + NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount, + Callee.NodePtr->CumulativeLocalTime, Callee.FId); + if (UNLIKELY(NewNode == nullptr)) + return; + NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId); + DFSStack.AppendEmplace(Callee.NodePtr, NewNode); + } + } + } + } + + // The mergeInto operation will update the provided FunctionCallTrie by + // traversing the current trie's roots and updating (i.e. merging) the data in + // the nodes with the data in the target's nodes. If the node doesn't exist in + // the provided trie, we add a new one in the right position, and inherit the + // data from the original (current) trie, along with all its callees. + // + // This function is *not* thread-safe, and may require external + // synchronisation of both "this" and |O|. + void mergeInto(FunctionCallTrie &O) const { + struct NodeAndTarget { + FunctionCallTrie::Node *OrigNode; + FunctionCallTrie::Node *TargetNode; + }; + using Stack = Array<NodeAndTarget>; + typename Stack::AllocatorType StackAllocator( + profilingFlags()->stack_allocator_max); + Stack DFSStack(StackAllocator); + + for (const auto Root : getRoots()) { + Node *TargetRoot = nullptr; + auto R = O.Roots.find_element( + [&](const Node *Node) { return Node->FId == Root->FId; }); + if (R == nullptr) { + TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0, + 0, Root->FId); + if (UNLIKELY(TargetRoot == nullptr)) + return; + + O.Roots.Append(TargetRoot); + } else { + TargetRoot = *R; + } + + DFSStack.Append(NodeAndTarget{Root, TargetRoot}); + while (!DFSStack.empty()) { + NodeAndTarget NT = DFSStack.back(); + DCHECK_NE(NT.OrigNode, nullptr); + DCHECK_NE(NT.TargetNode, nullptr); + DFSStack.trim(1); + // TODO: Update the histogram as well when we have it ready. + NT.TargetNode->CallCount += NT.OrigNode->CallCount; + NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; + for (const auto Callee : NT.OrigNode->Callees) { + auto TargetCallee = NT.TargetNode->Callees.find_element( + [&](const FunctionCallTrie::NodeIdPair &C) { + return C.FId == Callee.FId; + }); + if (TargetCallee == nullptr) { + auto NewTargetNode = O.Nodes.AppendEmplace( + NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId); + + if (UNLIKELY(NewTargetNode == nullptr)) + return; + + TargetCallee = + NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId); + } + DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr); + } + } + } + } +}; + +} // namespace __xray + +#endif // XRAY_FUNCTION_CALL_TRIE_H |