summaryrefslogtreecommitdiff
path: root/llvm/lib/Support/Windows/Threading.inc
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Support/Windows/Threading.inc')
-rw-r--r--llvm/lib/Support/Windows/Threading.inc176
1 files changed, 175 insertions, 1 deletions
diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc
index 9456efa686ffc..296e87b776959 100644
--- a/llvm/lib/Support/Windows/Threading.inc
+++ b/llvm/lib/Support/Windows/Threading.inc
@@ -13,9 +13,11 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
-#include "WindowsSupport.h"
+#include "llvm/Support/Windows/WindowsSupport.h"
#include <process.h>
+#include <bitset>
+
// Windows will at times define MemoryFence.
#ifdef MemoryFence
#undef MemoryFence
@@ -122,3 +124,175 @@ SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
? SetThreadPriorityResult::SUCCESS
: SetThreadPriorityResult::FAILURE;
}
+
+struct ProcessorGroup {
+ unsigned ID;
+ unsigned AllThreads;
+ unsigned UsableThreads;
+ unsigned ThreadsPerCore;
+ uint64_t Affinity;
+
+ unsigned useableCores() const {
+ return std::max(1U, UsableThreads / ThreadsPerCore);
+ }
+};
+
+template <typename F>
+static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
+ DWORD Len = 0;
+ BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
+ if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+ return false;
+ }
+ auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
+ R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
+ if (R) {
+ auto *End =
+ (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
+ for (auto *Curr = Info; Curr < End;
+ Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
+ Curr->Size)) {
+ if (Curr->Relationship != Relationship)
+ continue;
+ Fn(Curr);
+ }
+ }
+ free(Info);
+ return true;
+}
+
+static ArrayRef<ProcessorGroup> getProcessorGroups() {
+ auto computeGroups = []() {
+ SmallVector<ProcessorGroup, 4> Groups;
+
+ auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
+ GROUP_RELATIONSHIP &El = ProcInfo->Group;
+ for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
+ ProcessorGroup G;
+ G.ID = Groups.size();
+ G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
+ G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
+ assert(G.UsableThreads <= 64);
+ G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
+ Groups.push_back(G);
+ }
+ };
+
+ if (!IterateProcInfo(RelationGroup, HandleGroup))
+ return std::vector<ProcessorGroup>();
+
+ auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
+ PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
+ assert(El.GroupCount == 1);
+ unsigned NumHyperThreads = 1;
+ // If the flag is set, each core supports more than one hyper-thread.
+ if (El.Flags & LTP_PC_SMT)
+ NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
+ unsigned I = El.GroupMask[0].Group;
+ Groups[I].ThreadsPerCore = NumHyperThreads;
+ };
+
+ if (!IterateProcInfo(RelationProcessorCore, HandleProc))
+ return std::vector<ProcessorGroup>();
+
+ // If there's an affinity mask set on one of the CPUs, then assume the user
+ // wants to constrain the current process to only a single CPU.
+ for (auto &G : Groups) {
+ if (G.UsableThreads != G.AllThreads) {
+ ProcessorGroup NewG{G};
+ Groups.clear();
+ Groups.push_back(NewG);
+ break;
+ }
+ }
+
+ return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
+ };
+ static auto Groups = computeGroups();
+ return ArrayRef<ProcessorGroup>(Groups);
+}
+
+template <typename R, typename UnaryPredicate>
+static unsigned aggregate(R &&Range, UnaryPredicate P) {
+ unsigned I{};
+ for (const auto &It : Range)
+ I += P(It);
+ return I;
+}
+
+// for sys::getHostNumPhysicalCores
+int computeHostNumPhysicalCores() {
+ static unsigned Cores =
+ aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
+ return G.UsableThreads / G.ThreadsPerCore;
+ });
+ return Cores;
+}
+
+int computeHostNumHardwareThreads() {
+ static unsigned Threads =
+ aggregate(getProcessorGroups(),
+ [](const ProcessorGroup &G) { return G.UsableThreads; });
+ return Threads;
+}
+
+// Finds the proper CPU socket where a thread number should go. Returns 'None'
+// if the thread shall remain on the actual CPU socket.
+Optional<unsigned>
+llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
+ ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
+ // Only one CPU socket in the system or process affinity was set, no need to
+ // move the thread(s) to another CPU socket.
+ if (Groups.size() <= 1)
+ return None;
+
+ // We ask for less threads than there are hardware threads per CPU socket, no
+ // need to dispatch threads to other CPU sockets.
+ unsigned MaxThreadsPerSocket =
+ UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
+ if (compute_thread_count() <= MaxThreadsPerSocket)
+ return None;
+
+ assert(ThreadPoolNum < compute_thread_count() &&
+ "The thread index is not within thread strategy's range!");
+
+ // Assumes the same number of hardware threads per CPU socket.
+ return (ThreadPoolNum * Groups.size()) / compute_thread_count();
+}
+
+// Assign the current thread to a more appropriate CPU socket or CPU group
+void llvm::ThreadPoolStrategy::apply_thread_strategy(
+ unsigned ThreadPoolNum) const {
+ Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
+ if (!Socket)
+ return;
+ ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
+ GROUP_AFFINITY Affinity{};
+ Affinity.Group = Groups[*Socket].ID;
+ Affinity.Mask = Groups[*Socket].Affinity;
+ SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
+}
+
+llvm::BitVector llvm::get_thread_affinity_mask() {
+ GROUP_AFFINITY Affinity{};
+ GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
+
+ static unsigned All =
+ aggregate(getProcessorGroups(),
+ [](const ProcessorGroup &G) { return G.AllThreads; });
+
+ unsigned StartOffset =
+ aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
+ return G.ID < Affinity.Group ? G.AllThreads : 0;
+ });
+
+ llvm::BitVector V;
+ V.resize(All);
+ for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
+ if ((Affinity.Mask >> I) & 1)
+ V.set(StartOffset + I);
+ }
+ return V;
+}
+
+unsigned llvm::get_cpus() { return getProcessorGroups().size(); }