Mesa (master): swr: [rasterizer core] Affinitize thread scratch space to numa node of worker
Tim Rowley
torowley at kemper.freedesktop.org
Tue Apr 12 17:13:47 UTC 2016
Module: Mesa
Branch: master
Commit: c25244f2f7f61ebb368a9651aef4de93bd8306ac
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c25244f2f7f61ebb368a9651aef4de93bd8306ac
Author: Tim Rowley <timothy.o.rowley at intel.com>
Date: Thu Mar 24 16:20:02 2016 -0600
swr: [rasterizer core] Affinitize thread scratch space to numa node of worker
Acked-by: Brian Paul <brianp at vmware.com>
---
src/gallium/drivers/swr/rasterizer/core/api.cpp | 16 ++++++++++++++--
src/gallium/drivers/swr/rasterizer/core/arena.h | 2 +-
src/gallium/drivers/swr/rasterizer/core/backend.cpp | 2 +-
3 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index f0f7956..442cdd4 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -93,8 +93,16 @@ HANDLE SwrCreateContext(
///@note We could lazily allocate this but its rather small amount of memory.
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
- ///@todo Use numa API for allocations using numa information from thread data (if exists).
- pContext->pScratch[i] = (uint8_t*)_aligned_malloc((32 * 1024), KNOB_SIMD_WIDTH * 4);
+#if defined(_WIN32)
+ uint32_t numaNode = pContext->threadPool.pThreadData ?
+ pContext->threadPool.pThreadData[i].numaId : 0;
+ pContext->pScratch[i] = (uint8_t*)VirtualAllocExNuma(
+ GetCurrentProcess(), nullptr, 32 * sizeof(KILOBYTE),
+ MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE,
+ numaNode);
+#else
+ pContext->pScratch[i] = (uint8_t*)_aligned_malloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4);
+#endif
}
// State setup AFTER context is fully initialized
@@ -138,7 +146,11 @@ void SwrDestroyContext(HANDLE hContext)
// Free scratch space.
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
+#if defined(_WIN32)
+ VirtualFree(pContext->pScratch[i], 0, MEM_RELEASE);
+#else
_aligned_free(pContext->pScratch[i]);
+#endif
}
delete(pContext->pHotTileMgr);
diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h
index 67d81a4..0241f5b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/arena.h
+++ b/src/gallium/drivers/swr/rasterizer/core/arena.h
@@ -209,7 +209,7 @@ struct CachingAllocatorT : DefaultAllocator
};
typedef CachingAllocatorT<> CachingAllocator;
-template<typename T = DefaultAllocator, size_t BlockSizeT = (128 * 1024)>
+template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
class TArena
{
public:
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 7fb83ed..ad0a5a0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -83,7 +83,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
if (pDC->pSpillFill[workerId] == nullptr)
{
///@todo Add state which indicates the spill fill size.
- pDC->pSpillFill[workerId] = (uint8_t*)pDC->pArena->AllocAlignedSync(4096 * 1024, sizeof(float) * 8);
+ pDC->pSpillFill[workerId] = (uint8_t*)pDC->pArena->AllocAlignedSync(4 * sizeof(MEGABYTE), sizeof(float) * 8);
}
const API_STATE& state = GetApiState(pDC);
More information about the mesa-commit
mailing list