[Mesa-dev] [PATCH v2 06/11] swr: [rasterizer core] TemplateArgUnroller

Tim Rowley timothy.o.rowley at intel.com
Fri Apr 22 00:37:44 UTC 2016


Switch boolean template arguments to typename template arguments of type
std::integral_constant<bool, VALUE>.

This allows the template argument unroller to easily be extended to enums.
---
 src/gallium/drivers/swr/rasterizer/core/api.cpp    |  41 +------
 .../drivers/swr/rasterizer/core/frontend.cpp       | 120 ++++++++++-----------
 src/gallium/drivers/swr/rasterizer/core/frontend.h |  12 ++-
 src/gallium/drivers/swr/rasterizer/core/pa.h       |   6 +-
 src/gallium/drivers/swr/rasterizer/core/utils.h    |  31 ++++++
 5 files changed, 101 insertions(+), 109 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 06cbf7f..e950e92 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -41,6 +41,7 @@
 #include "core/threads.h"
 #include "core/tilemgr.h"
 #include "core/clip.h"
+#include "core/utils.h"
 
 #include "common/simdintrin.h"
 #include "common/os.h"
@@ -1029,42 +1030,6 @@ uint32_t MaxVertsPerDraw(
     return vertsPerDraw;
 }
 
-// Recursive template used to auto-nest conditionals.  Converts dynamic boolean function
-// arguments to static template arguments.
-template <bool... ArgsB>
-struct FEDrawChooser
-{
-    // Last Arg Terminator
-    static PFN_FE_WORK_FUNC GetFunc(bool bArg)
-    {
-        if (bArg)
-        {
-            return ProcessDraw<ArgsB..., true>;
-        }
-
-        return ProcessDraw<ArgsB..., false>;
-    }
-
-    // Recursively parse args
-    template <typename... TArgsT>
-    static PFN_FE_WORK_FUNC GetFunc(bool bArg, TArgsT... remainingArgs)
-    {
-        if (bArg)
-        {
-            return FEDrawChooser<ArgsB..., true>::GetFunc(remainingArgs...);
-        }
-
-        return FEDrawChooser<ArgsB..., false>::GetFunc(remainingArgs...);
-    }
-};
-
-// Selector for correct templated Draw front-end function
-INLINE
-static PFN_FE_WORK_FUNC GetFEDrawFunc(bool IsIndexed, bool HasTessellation, bool HasGeometryShader, bool HasStreamOut, bool RasterizerEnabled)
-{
-    return FEDrawChooser<>::GetFunc(IsIndexed, HasTessellation, HasGeometryShader, HasStreamOut, RasterizerEnabled);
-}
-
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief DrawInstanced
@@ -1119,7 +1084,7 @@ void DrawInstanced(
         InitDraw(pDC, isSplitDraw);
 
         pDC->FeWork.type = DRAW;
-        pDC->FeWork.pfnWork = GetFEDrawFunc(
+        pDC->FeWork.pfnWork = GetProcessDrawFunc(
             false,  // IsIndexed
             pState->tsState.tsEnable,
             pState->gsState.gsEnable,
@@ -1252,7 +1217,7 @@ void DrawIndexedInstance(
         InitDraw(pDC, isSplitDraw);
 
         pDC->FeWork.type = DRAW;
-        pDC->FeWork.pfnWork = GetFEDrawFunc(
+        pDC->FeWork.pfnWork = GetProcessDrawFunc(
             true,   // IsIndexed
             pState->tsState.tsEnable,
             pState->gsState.gsEnable,
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 9386961..5dcd05b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -675,8 +675,8 @@ THREAD SWR_GS_CONTEXT tlsGsContext;
 /// @param pa - The primitive assembly object.
 /// @param pGsOut - output stream for GS
 template <
-    bool HasStreamOutT,
-    bool HasRastT>
+    typename HasStreamOutT,
+    typename HasRastT>
 static void GeometryShaderStage(
     DRAW_CONTEXT *pDC,
     uint32_t workerId,
@@ -759,7 +759,7 @@ static void GeometryShaderStage(
 
     // set up new binner and state for the GS output topology
     PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
-    if (HasRastT)
+    if (HasRastT::value)
     {
         switch (pState->outputTopology)
         {
@@ -819,7 +819,7 @@ static void GeometryShaderStage(
                 else
                 {
                     // early exit if this stream is not enabled for streamout
-                    if (HasStreamOutT && !state.soState.streamEnable[stream])
+                    if (HasStreamOutT::value && !state.soState.streamEnable[stream])
                     {
                         continue;
                     }
@@ -842,12 +842,12 @@ static void GeometryShaderStage(
                         {
                             totalPrimsGenerated += gsPa.NumPrims();
 
-                            if (HasStreamOutT)
+                            if (HasStreamOutT::value)
                             {
                                 StreamOut(pDC, gsPa, workerId, pSoPrimData, stream);
                             }
 
-                            if (HasRastT && state.soState.streamToRasterizer == stream)
+                            if (HasRastT::value && state.soState.streamToRasterizer == stream)
                             {
                                 simdscalari vPrimId;
                                 // pull primitiveID from the GS output if available
@@ -957,9 +957,9 @@ static void AllocateTessellationData(SWR_CONTEXT* pContext)
 /// @param pa - The primitive assembly object.
 /// @param pGsOut - output stream for GS
 template <
-    bool HasGeometryShaderT,
-    bool HasStreamOutT,
-    bool HasRastT>
+    typename HasGeometryShaderT,
+    typename HasStreamOutT,
+    typename HasRastT>
 static void TessellationStages(
     DRAW_CONTEXT *pDC,
     uint32_t workerId,
@@ -995,7 +995,7 @@ static void TessellationStages(
     SWR_ASSERT(tsCtx);
 
     PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
-    if (HasRastT)
+    if (HasRastT::value)
     {
         switch (tsState.postDSTopology)
         {
@@ -1107,7 +1107,7 @@ static void TessellationStages(
 
         while (tessPa.HasWork())
         {
-            if (HasGeometryShaderT)
+            if (HasGeometryShaderT::value)
             {
                 GeometryShaderStage<HasStreamOutT, HasRastT>(
                     pDC, workerId, tessPa, pGsOut, pCutBuffer, pCutStreamBuffer, pSoPrimData,
@@ -1115,12 +1115,12 @@ static void TessellationStages(
             }
             else
             {
-                if (HasStreamOutT)
+                if (HasStreamOutT::value)
                 {
                     StreamOut(pDC, tessPa, workerId, pSoPrimData, 0);
                 }
 
-                if (HasRastT)
+                if (HasRastT::value)
                 {
                     simdvector prim[3]; // Only deal with triangles, lines, or points
                     RDTSC_START(FEPAAssemble);
@@ -1149,7 +1149,7 @@ static void TessellationStages(
 /// @brief FE handler for SwrDraw.
 /// @tparam IsIndexedT - Is indexed drawing enabled
 /// @tparam HasTessellationT - Is tessellation enabled
-/// @tparam HasGeometryShaderT - Is the geometry shader stage enabled
+/// @tparam HasGeometryShaderT::value - Is the geometry shader stage enabled
 /// @tparam HasStreamOutT - Is stream-out enabled
 /// @tparam HasRastT - Is rasterization enabled
 /// @param pContext - pointer to SWR context.
@@ -1157,11 +1157,11 @@ static void TessellationStages(
 /// @param workerId - thread's worker id.
 /// @param pUserData - Pointer to DRAW_WORK
 template <
-    bool IsIndexedT,
-    bool HasTessellationT,
-    bool HasGeometryShaderT,
-    bool HasStreamOutT,
-    bool HasRastT>
+    typename IsIndexedT,
+    typename HasTessellationT,
+    typename HasGeometryShaderT,
+    typename HasStreamOutT,
+    typename HasRastT>
 void ProcessDraw(
     SWR_CONTEXT *pContext,
     DRAW_CONTEXT *pDC,
@@ -1188,7 +1188,7 @@ void ProcessDraw(
     uint32_t endVertex = work.numVerts; 
 
     const int32_t* pLastRequestedIndex = nullptr;
-    if (IsIndexedT)
+    if (IsIndexedT::value)
     {
         switch (work.type)
         {
@@ -1223,7 +1223,7 @@ void ProcessDraw(
 
     vsContext.pVin = &vin;
 
-    if (IsIndexedT)
+    if (IsIndexedT::value)
     {
         fetchInfo.BaseVertex = work.baseVertex;
 
@@ -1247,12 +1247,12 @@ void ProcessDraw(
     void* pGsOut = nullptr;
     void* pCutBuffer = nullptr;
     void* pStreamCutBuffer = nullptr;
-    if (HasGeometryShaderT)
+    if (HasGeometryShaderT::value)
     {
         AllocateGsBuffers(pDC, state, &pGsOut, &pCutBuffer, &pStreamCutBuffer);
     }
 
-    if (HasTessellationT)
+    if (HasTessellationT::value)
     {
         SWR_ASSERT(state.tsState.tsEnable == true);
         SWR_ASSERT(state.pfnHsFunc != nullptr);
@@ -1269,7 +1269,7 @@ void ProcessDraw(
 
     // allocate space for streamout input prim data
     uint32_t* pSoPrimData = nullptr;
-    if (HasStreamOutT)
+    if (HasStreamOutT::value)
     {
         pSoPrimData = (uint32_t*)pDC->pArena->AllocAligned(4096, 16);
 
@@ -1291,7 +1291,7 @@ void ProcessDraw(
         simdscalari vIndex;
         uint32_t  i = 0;
 
-        if (IsIndexedT)
+        if (IsIndexedT::value)
         {
             fetchInfo.pIndices = work.pIB;
         }
@@ -1309,7 +1309,7 @@ void ProcessDraw(
             // PaGetNextVsOutput currently has the side effect of updating some PA state machine state.
             // So we need to keep this outside of (i < endVertex) check.
             simdmask* pvCutIndices = nullptr;
-            if (IsIndexedT)
+            if (IsIndexedT::value)
             {
                 pvCutIndices = &pa.GetNextVsIndices();
             }
@@ -1332,7 +1332,7 @@ void ProcessDraw(
                 vsContext.mask = GenerateMask(endVertex - i);
 
                 // forward cut mask to the PA
-                if (IsIndexedT)
+                if (IsIndexedT::value)
                 {
                     *pvCutIndices = _simd_movemask_ps(_simd_castsi_ps(fetchInfo.CutMask));
                 }
@@ -1372,12 +1372,12 @@ void ProcessDraw(
                         {
                             UPDATE_STAT(IaPrimitives, pa.NumPrims());
 
-                            if (HasTessellationT)
+                            if (HasTessellationT::value)
                             {
                                 TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
                                     pDC, workerId, pa, pGsOut, pCutBuffer, pStreamCutBuffer, pSoPrimData, pa.GetPrimID(work.startPrimID));
                             }
-                            else if (HasGeometryShaderT)
+                            else if (HasGeometryShaderT::value)
                             {
                                 GeometryShaderStage<HasStreamOutT, HasRastT>(
                                     pDC, workerId, pa, pGsOut, pCutBuffer, pStreamCutBuffer, pSoPrimData, pa.GetPrimID(work.startPrimID));
@@ -1385,12 +1385,12 @@ void ProcessDraw(
                             else
                             {
                                 // If streamout is enabled then stream vertices out to memory.
-                                if (HasStreamOutT)
+                                if (HasStreamOutT::value)
                                 {
                                     StreamOut(pDC, pa, workerId, pSoPrimData, 0);
                                 }
 
-                                if (HasRastT)
+                                if (HasRastT::value)
                                 {
                                     SWR_ASSERT(pDC->pState->pfnProcessPrims);
                                     pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
@@ -1403,7 +1403,7 @@ void ProcessDraw(
             } while (pa.NextPrim());
 
             i += KNOB_SIMD_WIDTH;
-            if (IsIndexedT)
+            if (IsIndexedT::value)
             {
                 fetchInfo.pIndices = (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize);
             }
@@ -1417,39 +1417,29 @@ void ProcessDraw(
 
     RDTSC_STOP(FEProcessDraw, numPrims * work.numInstances, pDC->drawId);
 }
-// Explicit Instantiation of all combinations
-template void ProcessDraw<false, false, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, false, true,  false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, false, true,  true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, true,  false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, true,  false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, true,  true,  false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, true,  true,  true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true,  false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true,  false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true,  false, true,  false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true,  false, true,  true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true,  true,  false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true,  true,  false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true,  true,  true,  false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true,  true,  true,  true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  false, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  false, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  false, false, true,  false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  false, false, true,  true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  false, true,  false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  false, true,  false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  false, true,  true,  false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  false, true,  true,  true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  true,  false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  true,  false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  true,  false, true,  false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  true,  false, true,  true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  true,  true,  false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  true,  true,  false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  true,  true,  true,  false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true,  true,  true,  true,  true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
+
+struct FEDrawChooser
+{
+    typedef PFN_FE_WORK_FUNC FuncType;
+
+    template <typename... ArgsB>
+    static FuncType GetFunc()
+    {
+        return ProcessDraw<ArgsB...>;
+    }
+};
+
+
+// Selector for correct templated Draw front-end function
+PFN_FE_WORK_FUNC GetProcessDrawFunc(
+    bool IsIndexed,
+    bool HasTessellation,
+    bool HasGeometryShader,
+    bool HasStreamOut,
+    bool HasRasterization)
+{
+    return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization);
+}
 
 
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index f92f88c..8307c0b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -28,6 +28,7 @@
 ******************************************************************************/
 #pragma once
 #include "context.h"
+#include <type_traits>
 
 INLINE
 __m128i fpToFixedPoint(const __m128 vIn)
@@ -309,9 +310,14 @@ bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
 uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
 uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
 
-// Templated Draw front-end function.  All combinations of template parameter values are available
-template <bool IsIndexedT, bool HasTessellationT, bool HasGeometryShaderT, bool HasStreamOutT, bool HasRastT>
-void ProcessDraw(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
+
+// ProcessDraw front-end function.  All combinations of parameter values are available
+PFN_FE_WORK_FUNC GetProcessDrawFunc(
+    bool IsIndexed,
+    bool HasTessellation,
+    bool HasGeometryShader,
+    bool HasStreamOut,
+    bool HasRasterization);
 
 void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
 void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
index 17f4885..067deab 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -1146,14 +1146,14 @@ private:
 
 // Primitive Assembler factory class, responsible for creating and initializing the correct assembler
 // based on state.
-template <bool IsIndexedT>
+template <typename IsIndexedT>
 struct PA_FACTORY
 {
     PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t numVerts) : topo(in_topo)
     {
 #if KNOB_ENABLE_CUT_AWARE_PA == TRUE
         const API_STATE& state = GetApiState(pDC);
-        if ((IsIndexedT && (
+        if ((IsIndexedT::value && (
             topo == TOP_TRIANGLE_STRIP || topo == TOP_POINT_LIST ||
             topo == TOP_LINE_LIST || topo == TOP_LINE_STRIP ||
             topo == TOP_TRIANGLE_LIST || topo == TOP_LINE_LIST_ADJ ||
@@ -1162,7 +1162,7 @@ struct PA_FACTORY
 
             // non-indexed draws with adjacency topologies must use cut-aware PA until we add support
             // for them in the optimized PA
-            (!IsIndexedT && (
+            (!IsIndexedT::value && (
             topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ)))
         {
             memset(&indexStore, 0, sizeof(indexStore));
diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h
index 60a3a6a..63ecd5c 100644
--- a/src/gallium/drivers/swr/rasterizer/core/utils.h
+++ b/src/gallium/drivers/swr/rasterizer/core/utils.h
@@ -28,6 +28,7 @@
 #pragma once
 
 #include <string.h>
+#include <type_traits>
 #include "common/os.h"
 #include "common/simdintrin.h"
 #include "common/swr_assert.h"
@@ -834,3 +835,33 @@ public:
         return T(word & ELEMENT_MASK);
     }
 };
+
+// Recursive template used to auto-nest conditionals.  Converts dynamic boolean function
+// arguments to static template arguments.
+template <typename TermT, typename... ArgsB>
+struct TemplateArgUnroller
+{
+    // Last Arg Terminator
+    static typename TermT::FuncType GetFunc(bool bArg)
+    {
+        if (bArg)
+        {
+            return TermT::template GetFunc<ArgsB..., std::true_type>();
+        }
+
+        return TermT::template GetFunc<ArgsB..., std::false_type>();
+    }
+
+    // Recursively parse args
+    template <typename... TArgsT>
+    static typename TermT::FuncType GetFunc(bool bArg, TArgsT... remainingArgs)
+    {
+        if (bArg)
+        {
+            return TemplateArgUnroller<TermT, ArgsB..., std::true_type>::GetFunc(remainingArgs...);
+        }
+
+        return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...);
+    }
+};
+
-- 
1.9.1



More information about the mesa-dev mailing list