Mesa (master): swr: [rasterizer core] improve implementation for SoWriteOffset

Tim Rowley torowley at kemper.freedesktop.org
Wed Aug 10 18:17:13 UTC 2016


Module: Mesa
Branch: master
Commit: 7cf187d08ae6a64c959de1cdf9004f5fb2fd097a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7cf187d08ae6a64c959de1cdf9004f5fb2fd097a

Author: Tim Rowley <timothy.o.rowley at intel.com>
Date:   Tue Aug  2 13:16:52 2016 -0600

swr: [rasterizer core] improve implementation for SoWriteOffset

1. SoWriteOffset is no longer treated as a stat
2. Added callback from core to update streamout write offset

Signed-off-by: Tim Rowley <timothy.o.rowley at intel.com>

---

 src/gallium/drivers/swr/rasterizer/core/api.cpp    |  7 ++++--
 src/gallium/drivers/swr/rasterizer/core/api.h      | 16 ++++++++++--
 .../drivers/swr/rasterizer/core/backend.cpp        |  6 -----
 src/gallium/drivers/swr/rasterizer/core/context.h  | 17 ++++++++++---
 .../drivers/swr/rasterizer/core/frontend.cpp       | 15 ++++-------
 src/gallium/drivers/swr/rasterizer/core/state.h    |  3 ++-
 .../drivers/swr/rasterizer/core/threads.cpp        | 29 +++++++++++++++++-----
 src/gallium/drivers/swr/swr_context.cpp            |  1 +
 8 files changed, 63 insertions(+), 31 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 0035258..bc36cfb 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -1,5 +1,5 @@
 /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+* Copyright (C) 2014-2016 Intel Corporation.   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -128,10 +128,11 @@ HANDLE SwrCreateContext(
     // initialize function pointer tables
     InitClearTilesTable();
 
-    // initialize store tiles function
+    // initialize callback functions
     pContext->pfnLoadTile = pCreateInfo->pfnLoadTile;
     pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
     pContext->pfnClearTile = pCreateInfo->pfnClearTile;
+    pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
 
     // pass pointer to bucket manager back to caller
 #ifdef KNOB_ENABLE_RDTSC
@@ -336,6 +337,8 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
         pCurDrawContext->threadsDone = 0;
         pCurDrawContext->retireCallback.pfnCallbackFunc = nullptr;
 
+        memset(&pCurDrawContext->dynState, 0, sizeof(pCurDrawContext->dynState));
+
         // Assign unique drawId for this DC
         pCurDrawContext->drawId = pContext->dcRing.GetHead();
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index ab56cab..681792f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -1,5 +1,5 @@
 /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+* Copyright (C) 2014-2016 Intel Corporation.   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -67,6 +67,7 @@ typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcForm
     SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
     uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pSrcHotTile);
 
+//////////////////////////////////////////////////////////////////////////
 /// @brief Function signature for clearing from the hot tiles clear value
 /// @param hPrivateContext - handle to private data
 /// @param renderTargetIndex - render target to store, can be color, depth or stencil
@@ -77,6 +78,16 @@ typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext,
     SWR_RENDERTARGET_ATTACHMENT rtIndex,
     uint32_t x, uint32_t y, const float* pClearColor);
 
+//////////////////////////////////////////////////////////////////////////
+/// @brief Callback to allow driver to update their copy of streamout write offset.
+///        This is call is made for any draw operation that has streamout enabled
+///        and has updated the write offset.
+/// @param hPrivateContext - handle to private data
+/// @param soBufferSlot - buffer slot for write offset
+/// @param soWriteOffset - update value for so write offset.
+typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
+    uint32_t soBufferSlot, uint32_t soWriteOffset);
+
 class BucketManager;
 
 //////////////////////////////////////////////////////////////////////////
@@ -90,10 +101,11 @@ struct SWR_CREATECONTEXT_INFO
     // Use SwrGetPrivateContextState() to access private state.
     uint32_t privateStateSize;
 
-    // Tile manipulation functions
+    // Callback functions
     PFN_LOAD_TILE pfnLoadTile;
     PFN_STORE_TILE pfnStoreTile;
     PFN_CLEAR_TILE pfnClearTile;
+    PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
 
     // Pointer to rdtsc buckets mgr returned to the caller.
     // Only populated when KNOB_ENABLE_RDTSC is set
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 92634b1..900849d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -111,12 +111,6 @@ void ProcessQueryStatsBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTil
 
         for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
         {
-            pStats->SoWriteOffset[stream] += pContext->stats[i].SoWriteOffset[stream];
-
-            /// @note client is required to provide valid write offset before every draw, so we clear
-            /// out the contents of the write offset when storing stats
-            pContext->stats[i].SoWriteOffset[stream] = 0;
-
             pStats->SoPrimStorageNeeded[stream] += pContext->stats[i].SoPrimStorageNeeded[stream];
             pStats->SoNumPrimsWritten[stream] += pContext->stats[i].SoNumPrimsWritten[stream];
         }
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 56f9797..b36154b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -1,5 +1,5 @@
 /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+* Copyright (C) 2014-2016 Intel Corporation.   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -367,6 +367,13 @@ struct DRAW_STATE
     CachingArena* pArena;     // This should only be used by API thread.
 };
 
+struct DRAW_DYNAMIC_STATE
+{
+    ///@todo Currently assumes only a single FE can do stream output for a draw.
+    uint32_t SoWriteOffset[4];
+    bool     SoWriteOffsetDirty[4];
+};
+
 // Draw Context
 //    The api thread sets up a draw context that exists for the life of the draw.
 //    This draw context maintains all of the state needed for the draw operation.
@@ -378,7 +385,9 @@ struct DRAW_CONTEXT
         MacroTileMgr*   pTileMgr;
         DispatchQueue*  pDispatch;      // Queue for thread groups. (isCompute)
     };
-    DRAW_STATE*     pState;
+    DRAW_STATE*     pState;             // Read-only state. Core should not update this outside of API thread.
+    DRAW_DYNAMIC_STATE dynState;
+
     CachingArena*   pArena;
 
     uint32_t        drawId;
@@ -465,10 +474,11 @@ struct SWR_CONTEXT
 
     HotTileMgr *pHotTileMgr;
 
-    // tile load/store functions, passed in at create context time
+    // Callback functions, passed in at create context time
     PFN_LOAD_TILE pfnLoadTile;
     PFN_STORE_TILE pfnStoreTile;
     PFN_CLEAR_TILE pfnClearTile;
+    PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
 
     // Global Stats
     SWR_STATS stats[KNOB_MAX_NUM_THREADS];
@@ -484,4 +494,3 @@ void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
 void WakeAllThreads(SWR_CONTEXT *pContext);
 
 #define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name += count; }
-#define SET_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name = count; }
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 61119d9..24b217d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -594,10 +594,12 @@ static void StreamOut(
         if (state.soBuffer[i].pWriteOffset)
         {
             *state.soBuffer[i].pWriteOffset = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
+        }
 
-            // The SOS increments the existing write offset. So we don't want to increment
-            // the SoWriteOffset stat using an absolute offset instead of relative.
-            SET_STAT(SoWriteOffset[i], soContext.pBuffer[i]->streamOffset);
+        if (state.soBuffer[i].soWriteEnable)
+        {
+            pDC->dynState.SoWriteOffset[i] = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
+            pDC->dynState.SoWriteOffsetDirty[i] = true;
         }
     }
 
@@ -1265,13 +1267,6 @@ void ProcessDraw(
     if (HasStreamOutT::value)
     {
         pSoPrimData = (uint32_t*)pDC->pArena->AllocAligned(4096, 16);
-
-        // update the
-        for (uint32_t i = 0; i < 4; ++i)
-        {
-            SET_STAT(SoWriteOffset[i], state.soBuffer[i].streamOffset);
-        }
-
     }
 
     // choose primitive assembler
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 3514a8f..f2ebf92 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -575,7 +575,6 @@ struct SWR_STATS
     uint64_t GsPrimitives;  // Number of prims GS outputs.
 
     // Streamout Stats
-    uint32_t SoWriteOffset[4];
     uint64_t SoPrimStorageNeeded[4];
     uint64_t SoNumPrimsWritten[4];
 };
@@ -585,11 +584,13 @@ struct SWR_STATS
 /////////////////////////////////////////////////////////////////////////
 
 #define MAX_SO_STREAMS 4
+#define MAX_SO_BUFFERS 4
 #define MAX_ATTRIBUTES 32
 
 struct SWR_STREAMOUT_BUFFER
 {
     bool enable;
+    bool soWriteEnable;
 
     // Pointers to streamout buffers.
     uint32_t* pBuffer;
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 0800d17..e87e732 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -320,6 +320,28 @@ bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastReti
     return pDC->dependent && IDComparesLess(lastRetiredDraw, pDC->drawId - 1);
 }
 
+INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
+{
+    if (pContext->pfnUpdateSoWriteOffset)
+    {
+        for (uint32_t i = 0; i < MAX_SO_BUFFERS; ++i)
+        {
+            if ((pDC->dynState.SoWriteOffsetDirty[i]) &&
+                (pDC->pState->state.soBuffer[i].soWriteEnable))
+            {
+                pContext->pfnUpdateSoWriteOffset(GetPrivateState(pDC), i, pDC->dynState.SoWriteOffset[i]);
+            }
+        }
+    }
+
+    if (pDC->retireCallback.pfnCallbackFunc)
+    {
+        pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData,
+            pDC->retireCallback.userData2,
+            pDC->retireCallback.userData3);
+    }
+}
+
 // inlined-only version
 INLINE int64_t CompleteDrawContextInl(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
 {
@@ -328,12 +350,7 @@ INLINE int64_t CompleteDrawContextInl(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
 
     if (result == 0)
     {
-        if (pDC->retireCallback.pfnCallbackFunc)
-        {
-            pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData,
-                                                pDC->retireCallback.userData2,
-                                                pDC->retireCallback.userData3);
-        }
+        ExecuteCallbacks(pContext, pDC);
 
         // Cleanup memory allocations
         pDC->pArena->Reset(true);
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
index 835c353..c8d5cd6 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -352,6 +352,7 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
       new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;
 
    SWR_CREATECONTEXT_INFO createInfo;
+   memset(&createInfo, 0, sizeof(createInfo));
    createInfo.driver = GL;
    createInfo.privateStateSize = sizeof(swr_draw_context);
    createInfo.pfnLoadTile = swr_LoadHotTile;




More information about the mesa-commit mailing list