[Mesa-dev] [PATCH 09/24] swr/rast: SIMD16 FE - interleaved simdvertex output in GS
Tim Rowley
timothy.o.rowley at intel.com
Sat May 27 21:25:04 UTC 2017
Eliminates conversion copies on GS output from simdvertex to simd16vertex.
---
.../drivers/swr/rasterizer/core/frontend.cpp | 22 ++++------------
src/gallium/drivers/swr/swr_shader.cpp | 29 +++++++++++++++++++---
2 files changed, 31 insertions(+), 20 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 3886c64..e88246f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -717,10 +717,6 @@ void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t num
THREAD SWR_GS_CONTEXT tlsGsContext;
-#if USE_SIMD16_FRONTEND
-THREAD simd16vertex tempVertex_simd16[128];
-
-#endif
template<typename SIMDVERTEX, uint32_t SIMD_WIDTH>
struct GsBufferInfo
{
@@ -819,7 +815,11 @@ static void GeometryShaderStage(
tlsGsContext.vert[i].attrib[VERTEX_POSITION_SLOT] = attrib[i];
}
+#if USE_SIMD16_FRONTEND
+ const GsBufferInfo<simd16vertex, KNOB_SIMD16_WIDTH> bufferInfo(state.gsState);
+#else
const GsBufferInfo<simdvertex, KNOB_SIMD_WIDTH> bufferInfo(state.gsState);
+#endif
// record valid prims from the frontend to avoid over binning the newly generated
// prims from the GS
@@ -923,19 +923,7 @@ static void GeometryShaderStage(
}
#if USE_SIMD16_FRONTEND
- // TEMPORARY: GS outputs simdvertex, PA inputs simd16vertex, so convert simdvertex to simd16vertex
-
- SWR_ASSERT(numEmittedVerts <= 256);
-
- PackPairsOfSimdVertexIntoSimd16Vertex(
- tempVertex_simd16,
- reinterpret_cast<const simdvertex *>(pBase),
- numEmittedVerts,
- SWR_VTX_NUM_SLOTS);
-
-#endif
-#if USE_SIMD16_FRONTEND
- PA_STATE_CUT gsPa(pDC, reinterpret_cast<uint8_t *>(tempVertex_simd16), numEmittedVerts, reinterpret_cast<simd16mask *>(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
+ PA_STATE_CUT gsPa(pDC, pBase, numEmittedVerts, reinterpret_cast<simd16mask *>(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
#else
PA_STATE_CUT gsPa(pDC, pBase, numEmittedVerts, pCutBuffer, numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index d55820e..2f495f5 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -370,8 +370,13 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+#if USE_SIMD16_FRONTEND
+ const uint32_t simdVertexStride = sizeof(simdvertex) * 2;
+ const uint32_t numSimdBatches = (pGS->maxNumVerts + (mVWidth * 2) - 1) / (mVWidth * 2);
+#else
const uint32_t simdVertexStride = sizeof(simdvertex);
- const uint32_t numSimdBatches = (pGS->maxNumVerts + 7) / 8;
+ const uint32_t numSimdBatches = (pGS->maxNumVerts + mVWidth - 1) / mVWidth;
+#endif
const uint32_t inputPrimStride = numSimdBatches * simdVertexStride;
Value *pStream = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_pStream });
@@ -388,8 +393,14 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
inputPrimStride * 6,
inputPrimStride * 7 } );
- Value *vVertexSlot = ASHR(unwrap(emitted_vertices_vec), 3);
- Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), 7);
+#if USE_SIMD16_FRONTEND
+ const uint32_t simdShift = log2(mVWidth * 2);
+ Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), (mVWidth * 2) - 1);
+#else
+ const uint32_t simdShift = log2(mVWidth);
+ Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), mVWidth - 1);
+#endif
+ Value *vVertexSlot = ASHR(unwrap(emitted_vertices_vec), simdShift);
for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
uint32_t attribSlot = attrib;
@@ -400,10 +411,17 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER)
attribSlot = VERTEX_RTAI_SLOT;
+#if USE_SIMD16_FRONTEND
+ Value *vOffsetsAttrib =
+ ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex) * 2)));
+ vOffsetsAttrib =
+ ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector) * 2)));
+#else
Value *vOffsetsAttrib =
ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex))));
vOffsetsAttrib =
ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector))));
+#endif
vOffsetsAttrib =
ADD(vOffsetsAttrib, MUL(vSimdSlot, VIMMED1((uint32_t)sizeof(float))));
@@ -416,8 +434,13 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
MASKED_SCATTER(vData, vPtrs, 32, vMask1);
+#if USE_SIMD16_FRONTEND
+ vOffsetsAttrib =
+ ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar) * 2));
+#else
vOffsetsAttrib =
ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar)));
+#endif
}
}
}
--
2.7.4
More information about the mesa-dev
mailing list