[Mesa-dev] [PATCH 09/10] swr/rast: adjust linux cpu topology identification code

Mon Sep 11 19:28:29 UTC 2017

Make more robust to handle strange strange configurations like a vmware
exported 4-way numa X 1-core configuration.
---
 .../drivers/swr/rasterizer/core/threads.cpp        | 81 ++++++++++------------
 1 file changed, 38 insertions(+), 43 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index b704d23..4bb395d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -169,37 +169,16 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
     std::ifstream input("/proc/cpuinfo");
     std::string line;
     char* c;
-    uint32_t threadId = uint32_t(-1);
+    uint32_t procId = uint32_t(-1);
     uint32_t coreId = uint32_t(-1);
-    uint32_t numaId = uint32_t(-1);
+    uint32_t physId = uint32_t(-1);
 
     while (std::getline(input, line))
     {
         if (line.find("processor") != std::string::npos)
         {
-            if (threadId != uint32_t(-1))
-            {
-                // Save information.
-                if (out_nodes.size() <= numaId)
-                {
-                    out_nodes.resize(numaId + 1);
-                }
-
-                auto& numaNode = out_nodes[numaId];
-                if (numaNode.cores.size() <= coreId)
-                {
-                    numaNode.cores.resize(coreId + 1);
-                }
-
-                auto& core = numaNode.cores[coreId];
-                core.procGroup = coreId;
-                core.threadIds.push_back(threadId);
-
-                out_numThreadsPerProcGroup++;
-            }
-
             auto data_start = line.find(": ") + 2;
-            threadId = std::strtoul(&line.c_str()[data_start], &c, 10);
+            procId = std::strtoul(&line.c_str()[data_start], &c, 10);
             continue;
         }
         if (line.find("core id") != std::string::npos)
@@ -211,29 +190,32 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
         if (line.find("physical id") != std::string::npos)
         {
             auto data_start = line.find(": ") + 2;
-            numaId = std::strtoul(&line.c_str()[data_start], &c, 10);
+            physId = std::strtoul(&line.c_str()[data_start], &c, 10);
             continue;
         }
+        if (line.length() == 0)
+        {
+            if (physId + 1 > out_nodes.size())
+                out_nodes.resize(physId + 1);
+            auto& numaNode = out_nodes[physId];
+            numaNode.numaId = physId;
+
+            if (coreId + 1 > numaNode.cores.size())
+                numaNode.cores.resize(coreId + 1);
+            auto& core = numaNode.cores[coreId];
+            core.procGroup = coreId;
+            core.threadIds.push_back(procId);
+        }
     }
 
-    if (threadId != uint32_t(-1))
+    out_numThreadsPerProcGroup = 0;
+    for (auto &node : out_nodes)
     {
-        // Save information.
-        if (out_nodes.size() <= numaId)
+        for (auto &core : node.cores)
         {
-            out_nodes.resize(numaId + 1);
+            out_numThreadsPerProcGroup = std::max((size_t)out_numThreadsPerProcGroup,
+                                                  core.threadIds.size());
         }
-        auto& numaNode = out_nodes[numaId];
-        numaNode.numaId = numaId;
-        if (numaNode.cores.size() <= coreId)
-        {
-            numaNode.cores.resize(coreId + 1);
-        }
-        auto& core = numaNode.cores[coreId];
-
-        core.procGroup = coreId;
-        core.threadIds.push_back(threadId);
-        out_numThreadsPerProcGroup++;
     }
 
 #else
@@ -316,7 +298,11 @@ void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId =
     CPU_ZERO(&cpuset);
     CPU_SET(threadId, &cpuset);
 
-    pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
+    int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
+    if (err != 0)
+    {
+        fprintf(stderr, "pthread_setaffinity_np failure for tid %u: %s\n", threadId, strerror(err));
+    }
 
 #endif
 }
@@ -1031,7 +1017,16 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
     }
     else
     {
-        pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
+        // numa distribution assumes workers on all nodes
+        bool useNuma = true;
+        if (numCoresPerNode * numHyperThreads == 1)
+            useNuma = false;
+
+        if (useNuma) {
+            pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
+        } else {
+            pPool->numaMask = 0;
+        }
 
         uint32_t workerId = 0;
         for (uint32_t n = 0; n < numNodes; ++n)
@@ -1064,7 +1059,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
                     pPool->pThreadData[workerId].workerId = workerId;
                     pPool->pThreadData[workerId].procGroupId = core.procGroup;
                     pPool->pThreadData[workerId].threadId = core.threadIds[t];
-                    pPool->pThreadData[workerId].numaId = node.numaId;
+                    pPool->pThreadData[workerId].numaId = useNuma ? n : 0;
                     pPool->pThreadData[workerId].coreId = c;
                     pPool->pThreadData[workerId].htId = t;
                     pPool->pThreadData[workerId].pContext = pContext;
-- 
2.7.4