diff options
Diffstat (limited to 'llvm/lib/Support/Windows/Threading.inc')
-rw-r--r-- | llvm/lib/Support/Windows/Threading.inc | 176 |
1 files changed, 175 insertions, 1 deletions
diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc index 9456efa686ffc..296e87b776959 100644 --- a/llvm/lib/Support/Windows/Threading.inc +++ b/llvm/lib/Support/Windows/Threading.inc @@ -13,9 +13,11 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" -#include "WindowsSupport.h" +#include "llvm/Support/Windows/WindowsSupport.h" #include <process.h> +#include <bitset> + // Windows will at times define MemoryFence. #ifdef MemoryFence #undef MemoryFence @@ -122,3 +124,175 @@ SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { ? SetThreadPriorityResult::SUCCESS : SetThreadPriorityResult::FAILURE; } + +struct ProcessorGroup { + unsigned ID; + unsigned AllThreads; + unsigned UsableThreads; + unsigned ThreadsPerCore; + uint64_t Affinity; + + unsigned useableCores() const { + return std::max(1U, UsableThreads / ThreadsPerCore); + } +}; + +template <typename F> +static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) { + DWORD Len = 0; + BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len); + if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return false; + } + auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len); + R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len); + if (R) { + auto *End = + (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len); + for (auto *Curr = Info; Curr < End; + Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr + + Curr->Size)) { + if (Curr->Relationship != Relationship) + continue; + Fn(Curr); + } + } + free(Info); + return true; +} + +static ArrayRef<ProcessorGroup> getProcessorGroups() { + auto computeGroups = []() { + SmallVector<ProcessorGroup, 4> Groups; + + auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { + GROUP_RELATIONSHIP &El = ProcInfo->Group; + for (unsigned J = 0; J < El.ActiveGroupCount; ++J) { + ProcessorGroup G; + G.ID = Groups.size(); + G.AllThreads = El.GroupInfo[J].MaximumProcessorCount; + G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount; + assert(G.UsableThreads <= 64); + G.Affinity = El.GroupInfo[J].ActiveProcessorMask; + Groups.push_back(G); + } + }; + + if (!IterateProcInfo(RelationGroup, HandleGroup)) + return std::vector<ProcessorGroup>(); + + auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { + PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor; + assert(El.GroupCount == 1); + unsigned NumHyperThreads = 1; + // If the flag is set, each core supports more than one hyper-thread. + if (El.Flags & LTP_PC_SMT) + NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count(); + unsigned I = El.GroupMask[0].Group; + Groups[I].ThreadsPerCore = NumHyperThreads; + }; + + if (!IterateProcInfo(RelationProcessorCore, HandleProc)) + return std::vector<ProcessorGroup>(); + + // If there's an affinity mask set on one of the CPUs, then assume the user + // wants to constrain the current process to only a single CPU. + for (auto &G : Groups) { + if (G.UsableThreads != G.AllThreads) { + ProcessorGroup NewG{G}; + Groups.clear(); + Groups.push_back(NewG); + break; + } + } + + return std::vector<ProcessorGroup>(Groups.begin(), Groups.end()); + }; + static auto Groups = computeGroups(); + return ArrayRef<ProcessorGroup>(Groups); +} + +template <typename R, typename UnaryPredicate> +static unsigned aggregate(R &&Range, UnaryPredicate P) { + unsigned I{}; + for (const auto &It : Range) + I += P(It); + return I; +} + +// for sys::getHostNumPhysicalCores +int computeHostNumPhysicalCores() { + static unsigned Cores = + aggregate(getProcessorGroups(), [](const ProcessorGroup &G) { + return G.UsableThreads / G.ThreadsPerCore; + }); + return Cores; +} + +int computeHostNumHardwareThreads() { + static unsigned Threads = + aggregate(getProcessorGroups(), + [](const ProcessorGroup &G) { return G.UsableThreads; }); + return Threads; +} + +// Finds the proper CPU socket where a thread number should go. Returns 'None' +// if the thread shall remain on the actual CPU socket. +Optional<unsigned> +llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const { + ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); + // Only one CPU socket in the system or process affinity was set, no need to + // move the thread(s) to another CPU socket. + if (Groups.size() <= 1) + return None; + + // We ask for less threads than there are hardware threads per CPU socket, no + // need to dispatch threads to other CPU sockets. + unsigned MaxThreadsPerSocket = + UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores(); + if (compute_thread_count() <= MaxThreadsPerSocket) + return None; + + assert(ThreadPoolNum < compute_thread_count() && + "The thread index is not within thread strategy's range!"); + + // Assumes the same number of hardware threads per CPU socket. + return (ThreadPoolNum * Groups.size()) / compute_thread_count(); +} + +// Assign the current thread to a more appropriate CPU socket or CPU group +void llvm::ThreadPoolStrategy::apply_thread_strategy( + unsigned ThreadPoolNum) const { + Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum); + if (!Socket) + return; + ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); + GROUP_AFFINITY Affinity{}; + Affinity.Group = Groups[*Socket].ID; + Affinity.Mask = Groups[*Socket].Affinity; + SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr); +} + +llvm::BitVector llvm::get_thread_affinity_mask() { + GROUP_AFFINITY Affinity{}; + GetThreadGroupAffinity(GetCurrentThread(), &Affinity); + + static unsigned All = + aggregate(getProcessorGroups(), + [](const ProcessorGroup &G) { return G.AllThreads; }); + + unsigned StartOffset = + aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) { + return G.ID < Affinity.Group ? G.AllThreads : 0; + }); + + llvm::BitVector V; + V.resize(All); + for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) { + if ((Affinity.Mask >> I) & 1) + V.set(StartOffset + I); + } + return V; +} + +unsigned llvm::get_cpus() { return getProcessorGroups().size(); } |