[Mesa-dev] [PATCH 06/11] swr: [rasterizer core] TemplateArgUnroller
Tim Rowley
timothy.o.rowley at intel.com
Thu Apr 14 19:53:13 UTC 2016
Switch boolean template arguments to typename template arguments of type
std::integral_constant<bool, VALUE>.
This allows the template argument unroller to easily be extended to enums.
---
src/gallium/drivers/swr/rasterizer/core/api.cpp | 41 +------
.../drivers/swr/rasterizer/core/frontend.cpp | 120 ++++++++++-----------
src/gallium/drivers/swr/rasterizer/core/frontend.h | 12 ++-
src/gallium/drivers/swr/rasterizer/core/pa.h | 6 +-
src/gallium/drivers/swr/rasterizer/core/utils.h | 31 ++++++
5 files changed, 101 insertions(+), 109 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 06cbf7f..e950e92 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -41,6 +41,7 @@
#include "core/threads.h"
#include "core/tilemgr.h"
#include "core/clip.h"
+#include "core/utils.h"
#include "common/simdintrin.h"
#include "common/os.h"
@@ -1029,42 +1030,6 @@ uint32_t MaxVertsPerDraw(
return vertsPerDraw;
}
-// Recursive template used to auto-nest conditionals. Converts dynamic boolean function
-// arguments to static template arguments.
-template <bool... ArgsB>
-struct FEDrawChooser
-{
- // Last Arg Terminator
- static PFN_FE_WORK_FUNC GetFunc(bool bArg)
- {
- if (bArg)
- {
- return ProcessDraw<ArgsB..., true>;
- }
-
- return ProcessDraw<ArgsB..., false>;
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_FE_WORK_FUNC GetFunc(bool bArg, TArgsT... remainingArgs)
- {
- if (bArg)
- {
- return FEDrawChooser<ArgsB..., true>::GetFunc(remainingArgs...);
- }
-
- return FEDrawChooser<ArgsB..., false>::GetFunc(remainingArgs...);
- }
-};
-
-// Selector for correct templated Draw front-end function
-INLINE
-static PFN_FE_WORK_FUNC GetFEDrawFunc(bool IsIndexed, bool HasTessellation, bool HasGeometryShader, bool HasStreamOut, bool RasterizerEnabled)
-{
- return FEDrawChooser<>::GetFunc(IsIndexed, HasTessellation, HasGeometryShader, HasStreamOut, RasterizerEnabled);
-}
-
//////////////////////////////////////////////////////////////////////////
/// @brief DrawInstanced
@@ -1119,7 +1084,7 @@ void DrawInstanced(
InitDraw(pDC, isSplitDraw);
pDC->FeWork.type = DRAW;
- pDC->FeWork.pfnWork = GetFEDrawFunc(
+ pDC->FeWork.pfnWork = GetProcessDrawFunc(
false, // IsIndexed
pState->tsState.tsEnable,
pState->gsState.gsEnable,
@@ -1252,7 +1217,7 @@ void DrawIndexedInstance(
InitDraw(pDC, isSplitDraw);
pDC->FeWork.type = DRAW;
- pDC->FeWork.pfnWork = GetFEDrawFunc(
+ pDC->FeWork.pfnWork = GetProcessDrawFunc(
true, // IsIndexed
pState->tsState.tsEnable,
pState->gsState.gsEnable,
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 9386961..5dcd05b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -675,8 +675,8 @@ THREAD SWR_GS_CONTEXT tlsGsContext;
/// @param pa - The primitive assembly object.
/// @param pGsOut - output stream for GS
template <
- bool HasStreamOutT,
- bool HasRastT>
+ typename HasStreamOutT,
+ typename HasRastT>
static void GeometryShaderStage(
DRAW_CONTEXT *pDC,
uint32_t workerId,
@@ -759,7 +759,7 @@ static void GeometryShaderStage(
// set up new binner and state for the GS output topology
PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
- if (HasRastT)
+ if (HasRastT::value)
{
switch (pState->outputTopology)
{
@@ -819,7 +819,7 @@ static void GeometryShaderStage(
else
{
// early exit if this stream is not enabled for streamout
- if (HasStreamOutT && !state.soState.streamEnable[stream])
+ if (HasStreamOutT::value && !state.soState.streamEnable[stream])
{
continue;
}
@@ -842,12 +842,12 @@ static void GeometryShaderStage(
{
totalPrimsGenerated += gsPa.NumPrims();
- if (HasStreamOutT)
+ if (HasStreamOutT::value)
{
StreamOut(pDC, gsPa, workerId, pSoPrimData, stream);
}
- if (HasRastT && state.soState.streamToRasterizer == stream)
+ if (HasRastT::value && state.soState.streamToRasterizer == stream)
{
simdscalari vPrimId;
// pull primitiveID from the GS output if available
@@ -957,9 +957,9 @@ static void AllocateTessellationData(SWR_CONTEXT* pContext)
/// @param pa - The primitive assembly object.
/// @param pGsOut - output stream for GS
template <
- bool HasGeometryShaderT,
- bool HasStreamOutT,
- bool HasRastT>
+ typename HasGeometryShaderT,
+ typename HasStreamOutT,
+ typename HasRastT>
static void TessellationStages(
DRAW_CONTEXT *pDC,
uint32_t workerId,
@@ -995,7 +995,7 @@ static void TessellationStages(
SWR_ASSERT(tsCtx);
PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
- if (HasRastT)
+ if (HasRastT::value)
{
switch (tsState.postDSTopology)
{
@@ -1107,7 +1107,7 @@ static void TessellationStages(
while (tessPa.HasWork())
{
- if (HasGeometryShaderT)
+ if (HasGeometryShaderT::value)
{
GeometryShaderStage<HasStreamOutT, HasRastT>(
pDC, workerId, tessPa, pGsOut, pCutBuffer, pCutStreamBuffer, pSoPrimData,
@@ -1115,12 +1115,12 @@ static void TessellationStages(
}
else
{
- if (HasStreamOutT)
+ if (HasStreamOutT::value)
{
StreamOut(pDC, tessPa, workerId, pSoPrimData, 0);
}
- if (HasRastT)
+ if (HasRastT::value)
{
simdvector prim[3]; // Only deal with triangles, lines, or points
RDTSC_START(FEPAAssemble);
@@ -1149,7 +1149,7 @@ static void TessellationStages(
/// @brief FE handler for SwrDraw.
/// @tparam IsIndexedT - Is indexed drawing enabled
/// @tparam HasTessellationT - Is tessellation enabled
-/// @tparam HasGeometryShaderT - Is the geometry shader stage enabled
+/// @tparam HasGeometryShaderT::value - Is the geometry shader stage enabled
/// @tparam HasStreamOutT - Is stream-out enabled
/// @tparam HasRastT - Is rasterization enabled
/// @param pContext - pointer to SWR context.
@@ -1157,11 +1157,11 @@ static void TessellationStages(
/// @param workerId - thread's worker id.
/// @param pUserData - Pointer to DRAW_WORK
template <
- bool IsIndexedT,
- bool HasTessellationT,
- bool HasGeometryShaderT,
- bool HasStreamOutT,
- bool HasRastT>
+ typename IsIndexedT,
+ typename HasTessellationT,
+ typename HasGeometryShaderT,
+ typename HasStreamOutT,
+ typename HasRastT>
void ProcessDraw(
SWR_CONTEXT *pContext,
DRAW_CONTEXT *pDC,
@@ -1188,7 +1188,7 @@ void ProcessDraw(
uint32_t endVertex = work.numVerts;
const int32_t* pLastRequestedIndex = nullptr;
- if (IsIndexedT)
+ if (IsIndexedT::value)
{
switch (work.type)
{
@@ -1223,7 +1223,7 @@ void ProcessDraw(
vsContext.pVin = &vin;
- if (IsIndexedT)
+ if (IsIndexedT::value)
{
fetchInfo.BaseVertex = work.baseVertex;
@@ -1247,12 +1247,12 @@ void ProcessDraw(
void* pGsOut = nullptr;
void* pCutBuffer = nullptr;
void* pStreamCutBuffer = nullptr;
- if (HasGeometryShaderT)
+ if (HasGeometryShaderT::value)
{
AllocateGsBuffers(pDC, state, &pGsOut, &pCutBuffer, &pStreamCutBuffer);
}
- if (HasTessellationT)
+ if (HasTessellationT::value)
{
SWR_ASSERT(state.tsState.tsEnable == true);
SWR_ASSERT(state.pfnHsFunc != nullptr);
@@ -1269,7 +1269,7 @@ void ProcessDraw(
// allocate space for streamout input prim data
uint32_t* pSoPrimData = nullptr;
- if (HasStreamOutT)
+ if (HasStreamOutT::value)
{
pSoPrimData = (uint32_t*)pDC->pArena->AllocAligned(4096, 16);
@@ -1291,7 +1291,7 @@ void ProcessDraw(
simdscalari vIndex;
uint32_t i = 0;
- if (IsIndexedT)
+ if (IsIndexedT::value)
{
fetchInfo.pIndices = work.pIB;
}
@@ -1309,7 +1309,7 @@ void ProcessDraw(
// PaGetNextVsOutput currently has the side effect of updating some PA state machine state.
// So we need to keep this outside of (i < endVertex) check.
simdmask* pvCutIndices = nullptr;
- if (IsIndexedT)
+ if (IsIndexedT::value)
{
pvCutIndices = &pa.GetNextVsIndices();
}
@@ -1332,7 +1332,7 @@ void ProcessDraw(
vsContext.mask = GenerateMask(endVertex - i);
// forward cut mask to the PA
- if (IsIndexedT)
+ if (IsIndexedT::value)
{
*pvCutIndices = _simd_movemask_ps(_simd_castsi_ps(fetchInfo.CutMask));
}
@@ -1372,12 +1372,12 @@ void ProcessDraw(
{
UPDATE_STAT(IaPrimitives, pa.NumPrims());
- if (HasTessellationT)
+ if (HasTessellationT::value)
{
TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
pDC, workerId, pa, pGsOut, pCutBuffer, pStreamCutBuffer, pSoPrimData, pa.GetPrimID(work.startPrimID));
}
- else if (HasGeometryShaderT)
+ else if (HasGeometryShaderT::value)
{
GeometryShaderStage<HasStreamOutT, HasRastT>(
pDC, workerId, pa, pGsOut, pCutBuffer, pStreamCutBuffer, pSoPrimData, pa.GetPrimID(work.startPrimID));
@@ -1385,12 +1385,12 @@ void ProcessDraw(
else
{
// If streamout is enabled then stream vertices out to memory.
- if (HasStreamOutT)
+ if (HasStreamOutT::value)
{
StreamOut(pDC, pa, workerId, pSoPrimData, 0);
}
- if (HasRastT)
+ if (HasRastT::value)
{
SWR_ASSERT(pDC->pState->pfnProcessPrims);
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
@@ -1403,7 +1403,7 @@ void ProcessDraw(
} while (pa.NextPrim());
i += KNOB_SIMD_WIDTH;
- if (IsIndexedT)
+ if (IsIndexedT::value)
{
fetchInfo.pIndices = (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize);
}
@@ -1417,39 +1417,29 @@ void ProcessDraw(
RDTSC_STOP(FEProcessDraw, numPrims * work.numInstances, pDC->drawId);
}
-// Explicit Instantiation of all combinations
-template void ProcessDraw<false, false, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, false, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<false, true, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, false, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, false, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, false, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, false, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, false, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, false, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, false, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, false, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, true, false, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, true, false, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, true, false, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, true, false, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, true, true, false, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, true, true, false, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, true, true, true, false>(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-template void ProcessDraw<true, true, true, true, true >(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
+
+struct FEDrawChooser
+{
+ typedef PFN_FE_WORK_FUNC FuncType;
+
+ template <typename... ArgsB>
+ static FuncType GetFunc()
+ {
+ return ProcessDraw<ArgsB...>;
+ }
+};
+
+
+// Selector for correct templated Draw front-end function
+PFN_FE_WORK_FUNC GetProcessDrawFunc(
+ bool IsIndexed,
+ bool HasTessellation,
+ bool HasGeometryShader,
+ bool HasStreamOut,
+ bool HasRasterization)
+{
+ return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization);
+}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index f92f88c..8307c0b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -28,6 +28,7 @@
******************************************************************************/
#pragma once
#include "context.h"
+#include <type_traits>
INLINE
__m128i fpToFixedPoint(const __m128 vIn)
@@ -309,9 +310,14 @@ bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
-// Templated Draw front-end function. All combinations of template parameter values are available
-template <bool IsIndexedT, bool HasTessellationT, bool HasGeometryShaderT, bool HasStreamOutT, bool HasRastT>
-void ProcessDraw(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
+
+// ProcessDraw front-end function. All combinations of parameter values are available
+PFN_FE_WORK_FUNC GetProcessDrawFunc(
+ bool IsIndexed,
+ bool HasTessellation,
+ bool HasGeometryShader,
+ bool HasStreamOut,
+ bool HasRasterization);
void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
index 17f4885..067deab 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -1146,14 +1146,14 @@ private:
// Primitive Assembler factory class, responsible for creating and initializing the correct assembler
// based on state.
-template <bool IsIndexedT>
+template <typename IsIndexedT>
struct PA_FACTORY
{
PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t numVerts) : topo(in_topo)
{
#if KNOB_ENABLE_CUT_AWARE_PA == TRUE
const API_STATE& state = GetApiState(pDC);
- if ((IsIndexedT && (
+ if ((IsIndexedT::value && (
topo == TOP_TRIANGLE_STRIP || topo == TOP_POINT_LIST ||
topo == TOP_LINE_LIST || topo == TOP_LINE_STRIP ||
topo == TOP_TRIANGLE_LIST || topo == TOP_LINE_LIST_ADJ ||
@@ -1162,7 +1162,7 @@ struct PA_FACTORY
// non-indexed draws with adjacency topologies must use cut-aware PA until we add support
// for them in the optimized PA
- (!IsIndexedT && (
+ (!IsIndexedT::value && (
topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ)))
{
memset(&indexStore, 0, sizeof(indexStore));
diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h
index 60a3a6a..63ecd5c 100644
--- a/src/gallium/drivers/swr/rasterizer/core/utils.h
+++ b/src/gallium/drivers/swr/rasterizer/core/utils.h
@@ -28,6 +28,7 @@
#pragma once
#include <string.h>
+#include <type_traits>
#include "common/os.h"
#include "common/simdintrin.h"
#include "common/swr_assert.h"
@@ -834,3 +835,33 @@ public:
return T(word & ELEMENT_MASK);
}
};
+
+// Recursive template used to auto-nest conditionals. Converts dynamic boolean function
+// arguments to static template arguments.
+template <typename TermT, typename... ArgsB>
+struct TemplateArgUnroller
+{
+ // Last Arg Terminator
+ static typename TermT::FuncType GetFunc(bool bArg)
+ {
+ if (bArg)
+ {
+ return TermT::template GetFunc<ArgsB..., std::true_type>();
+ }
+
+ return TermT::template GetFunc<ArgsB..., std::false_type>();
+ }
+
+ // Recursively parse args
+ template <typename... TArgsT>
+ static typename TermT::FuncType GetFunc(bool bArg, TArgsT... remainingArgs)
+ {
+ if (bArg)
+ {
+ return TemplateArgUnroller<TermT, ArgsB..., std::true_type>::GetFunc(remainingArgs...);
+ }
+
+ return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...);
+ }
+};
+
--
1.9.1
More information about the mesa-dev
mailing list