Mesa (master): swr/rast: enable per-warp scratch space for CS

Tim Rowley torowley at kemper.freedesktop.org
Sat Apr 29 01:03:08 UTC 2017


Module: Mesa
Branch: master
Commit: dabd0499a6ba4032f2cf24103a7ca044061a3b98
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=dabd0499a6ba4032f2cf24103a7ca044061a3b98

Author: Tim Rowley <timothy.o.rowley at intel.com>
Date:   Wed Apr 19 17:03:32 2017 -0500

swr/rast: enable per-warp scratch space for CS

Reviewed-by: Bruce Cherniak <bruce.cherniak at intel.com>

---

 src/gallium/drivers/swr/rasterizer/core/api.cpp     |  6 +++++-
 src/gallium/drivers/swr/rasterizer/core/api.h       |  7 ++++++-
 src/gallium/drivers/swr/rasterizer/core/backend.cpp | 10 +++++++++-
 src/gallium/drivers/swr/rasterizer/core/backend.h   |  2 +-
 src/gallium/drivers/swr/rasterizer/core/context.h   |  2 ++
 src/gallium/drivers/swr/rasterizer/core/state.h     |  5 +++++
 src/gallium/drivers/swr/rasterizer/core/threads.cpp |  3 ++-
 src/gallium/drivers/swr/rasterizer/core/tilemgr.h   |  6 +++---
 8 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 5c3225d58b..dc8f517386 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -592,12 +592,16 @@ void SwrSetCsFunc(
     HANDLE hContext,
     PFN_CS_FUNC pfnCsFunc,
     uint32_t totalThreadsInGroup,
-    uint32_t totalSpillFillSize)
+    uint32_t totalSpillFillSize,
+    uint32_t scratchSpaceSizePerInstance,
+    uint32_t numInstances)
 {
     API_STATE* pState = GetDrawState(GetContext(hContext));
     pState->pfnCsFunc = pfnCsFunc;
     pState->totalThreadsInGroup = totalThreadsInGroup;
     pState->totalSpillFillSize = totalSpillFillSize;
+    pState->scratchSpaceSize = scratchSpaceSizePerInstance;
+    pState->scratchSpaceNumInstances = numInstances;
 }
 
 void SwrSetTsState(
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index b9b994ad37..166598a48d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -366,11 +366,16 @@ void SWR_API SwrSetGsFunc(
 /// @param pfnCsFunc - Pointer to compute shader function
 /// @param totalThreadsInGroup - product of thread group dimensions.
 /// @param totalSpillFillSize - size in bytes needed for spill/fill.
+/// @param scratchSpaceSizePerInstance - size of the scratch space needed per simd instance
+/// @param numInstances - number of simd instances that are run per execution of the shader
 void SWR_API SwrSetCsFunc(
     HANDLE hContext,
     PFN_CS_FUNC pfnCsFunc,
     uint32_t totalThreadsInGroup,
-    uint32_t totalSpillFillSize);
+    uint32_t totalSpillFillSize,
+    uint32_t scratchSpaceSizePerInstance,
+    uint32_t numInstances
+    );
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief Set tessellation state.
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 39f4802db4..16698ef08f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -45,7 +45,7 @@ static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
 /// @param pDC - pointer to draw context (dispatch).
 /// @param workerId - The unique worker ID that is assigned to this thread.
 /// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
+void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
 
@@ -60,6 +60,12 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
     {
         pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD_BYTES);
     }
+    
+    size_t scratchSpaceSize = pDC->pState->state.scratchSpaceSize * pDC->pState->state.scratchSpaceNumInstances;
+    if (scratchSpaceSize && pScratchSpace == nullptr)
+    {
+        pScratchSpace = pDC->pArena->AllocAlignedSync(scratchSpaceSize, KNOB_SIMD_BYTES);
+    }
 
     const API_STATE& state = GetApiState(pDC);
 
@@ -70,6 +76,8 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
     csContext.dispatchDims[2] = pTaskData->threadGroupCountZ;
     csContext.pTGSM = pContext->ppScratch[workerId];
     csContext.pSpillFillBuffer = (uint8_t*)pSpillFillBuffer;
+    csContext.pScratchSpace = (uint8_t*)pScratchSpace;
+    csContext.scratchSpacePerSimd = pDC->pState->state.scratchSpaceSize;
 
     state.pfnCsFunc(GetPrivateState(pDC), &csContext);
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index ade9afccd9..822daa3213 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -34,7 +34,7 @@
 #include "depthstencil.h"
 #include "rdtsc_core.h"
 
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
+void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
 void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
 void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
 void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 94085e5999..7781feaf10 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -245,6 +245,8 @@ OSALIGNLINE(struct) API_STATE
     PFN_CS_FUNC             pfnCsFunc;
     uint32_t                totalThreadsInGroup;
     uint32_t                totalSpillFillSize;
+    uint32_t                scratchSpaceSize;
+    uint32_t                scratchSpaceNumInstances;
 
     // FE - Frontend State
     SWR_FRONTEND_STATE      frontendState;
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 3d0b4ff951..bf735e03f2 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -378,6 +378,11 @@ struct SWR_CS_CONTEXT
     uint8_t* pTGSM;  // Thread Group Shared Memory pointer.
 
     uint8_t* pSpillFillBuffer;  // Spill/fill buffer for barrier support
+
+    uint8_t* pScratchSpace;     // Pointer to scratch space buffer used by the shader, shader is responsible
+                                // for subdividing scratch space per instance/simd
+
+    uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
 };
 
 // enums
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 295014d1a7..e03632b443 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -726,10 +726,11 @@ void WorkOnCompute(
         if (queue.getNumQueued() > 0)
         {
             void* pSpillFillBuffer = nullptr;
+            void* pScratchSpace = nullptr;
             uint32_t threadGroupId = 0;
             while (queue.getWork(threadGroupId))
             {
-                queue.dispatch(pDC, workerId, threadGroupId, pSpillFillBuffer);
+                queue.dispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
                 queue.finishedWork();
             }
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
index bfff339a55..8f1cd21543 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
@@ -151,7 +151,7 @@ private:
     OSALIGNLINE(volatile LONG) mWorkItemsConsumed { 0 };
 };
 
-typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
+typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
 
 //////////////////////////////////////////////////////////////////////////
 /// DispatchQueue - work queue for dispatch
@@ -231,10 +231,10 @@ public:
 
     //////////////////////////////////////////////////////////////////////////
     /// @brief Dispatches a unit of work
-    void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
+    void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
     {
         SWR_ASSERT(mPfnDispatch != nullptr);
-        mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer);
+        mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
     }
 
     void* mpTaskData{ nullptr };        // The API thread will set this up and the callback task function will interpet this.




More information about the mesa-commit mailing list