[Mesa-dev] [PATCH 2/7] swr/rast: Miscellaneous viewport array code changes
Tim Rowley
timothy.o.rowley at intel.com
Thu Oct 19 13:12:23 UTC 2017
---
src/gallium/drivers/swr/rasterizer/core/binner.cpp | 45 ++++++++++++++++------
src/gallium/drivers/swr/rasterizer/core/clip.h | 14 +++++--
.../drivers/swr/rasterizer/core/frontend.cpp | 22 ++++++-----
src/gallium/drivers/swr/rasterizer/core/pa.h | 24 ++++++------
src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp | 4 +-
5 files changed, 71 insertions(+), 38 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index e08e489..b624ae6 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -450,16 +450,22 @@ void SIMDCALL BinTrianglesImpl(
typename SIMD_T::Float vRecipW1 = SIMD_T::set1_ps(1.0f);
typename SIMD_T::Float vRecipW2 = SIMD_T::set1_ps(1.0f);
- typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(0);
+ typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
+ typename SIMD_T::Vec4 vpiAttrib[3];
+ typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
if (state.backendState.readViewportArrayIndex)
{
- typename SIMD_T::Vec4 vpiAttrib[3];
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ }
+
+
+ if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0
+ {
// OOB indices => forced to zero.
- typename SIMD_T::Integer vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
- vpai = SIMD_T::max_epi32(SIMD_T::setzero_si(), vpai);
+ vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
viewportIdx = SIMD_T::and_si(vClearMask, vpai);
@@ -815,6 +821,7 @@ endBinTriangles:
SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), SIMD_T::setzero_si());
}
+
// scan remaining valid triangles and bin each separately
while (_BitScanForward(&triIndex, triMask))
{
@@ -1299,15 +1306,22 @@ void BinPointsImpl(
const SWR_RASTSTATE& rastState = state.rastState;
// Read back viewport index if required
- typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(0);
+ typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
+ typename SIMD_T::Vec4 vpiAttrib[1];
+ typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
+
if (state.backendState.readViewportArrayIndex)
{
- typename SIMD_T::Vec4 vpiAttrib[1];
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ }
+
+
+ if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0
+ {
// OOB indices => forced to zero.
- typename SIMD_T::Integer vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
- vpai = SIMD_T::max_epi32(SIMD_T::setzero_si(), vpai);
+ vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
viewportIdx = SIMD_T::and_si(vClearMask, vpai);
@@ -1626,15 +1640,22 @@ void SIMDCALL BinLinesImpl(
typename SIMD_T::Float vRecipW[2] = { SIMD_T::set1_ps(1.0f), SIMD_T::set1_ps(1.0f) };
- typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(0);
+ typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
+ typename SIMD_T::Vec4 vpiAttrib[2];
+ typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
+
if (state.backendState.readViewportArrayIndex)
{
- typename SIMD_T::Vec4 vpiAttrib[2];
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ }
+
+
+ if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0
+ {
// OOB indices => forced to zero.
- typename SIMD_T::Integer vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
- vpai = SIMD_T::max_epi32(SIMD_T::setzero_si(), vpai);
+ vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
viewportIdx = SIMD_T::and_si(vClearMask, vpai);
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index e9a410d..0d3d780 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -641,7 +641,7 @@ public:
}
}
- PA_STATE_OPT clipPA(pDC, numEmittedPrims, reinterpret_cast<uint8_t *>(&transposedPrims[0]), numEmittedVerts, SWR_VTX_NUM_SLOTS, true, clipTopology);
+ PA_STATE_OPT clipPA(pDC, numEmittedPrims, reinterpret_cast<uint8_t *>(&transposedPrims[0]), numEmittedVerts, SWR_VTX_NUM_SLOTS, true, NumVertsPerPrim, clipTopology);
static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f };
@@ -687,15 +687,21 @@ public:
UPDATE_STAT_FE(CInvocations, numInvoc);
// Read back viewport index if required
- typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(0);
+ typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
+ typename SIMD_T::Vec4 vpiAttrib[NumVertsPerPrim];
+ typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
if (state.backendState.readViewportArrayIndex)
{
- typename SIMD_T::Vec4 vpiAttrib[NumVertsPerPrim];
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ }
+
+
+ if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0
+ {
// OOB indices => forced to zero.
- typename SIMD_T::Integer vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index aea8e88..a803512 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -951,7 +951,7 @@ static void GeometryShaderStage(
}
#if USE_SIMD16_FRONTEND
- PA_STATE_CUT gsPa(pDC, (uint8_t*)pGsBuffers->pGsTransposed, numEmittedVerts, pState->outputVertexSize, reinterpret_cast<simd16mask *>(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
+ PA_STATE_CUT gsPa(pDC, (uint8_t*)pGsBuffers->pGsTransposed, numEmittedVerts, pState->outputVertexSize, reinterpret_cast<simd16mask *>(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts, pa.numVertsPerPrim);
#else
PA_STATE_CUT gsPa(pDC, (uint8_t*)pGsBuffers->pGsTransposed, numEmittedVerts, pState->outputVertexSize, pCutBuffer, numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
@@ -986,9 +986,10 @@ static void GeometryShaderStage(
{
#if USE_SIMD16_FRONTEND
simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]);
-
- gsPa.useAlternateOffset = false;
- pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId);
+ {
+ gsPa.useAlternateOffset = false;
+ pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId);
+ }
#else
simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId);
@@ -1273,7 +1274,8 @@ static void TessellationStages(
tsState.numDsOutputAttribs,
tsData.ppIndices,
tsData.NumPrimitives,
- tsState.postDSTopology);
+ tsState.postDSTopology,
+ numVertsPerPrim);
while (tessPa.HasWork())
{
@@ -1498,7 +1500,8 @@ void ProcessDraw(
}
// choose primitive assembler
- PA_FACTORY<IsIndexedT, IsCutIndexEnabledT> paFactory(pDC, state.topology, work.numVerts, gpVertexStore, numVerts, state.frontendState.vsVertexSize);
+
+ PA_FACTORY<IsIndexedT, IsCutIndexEnabledT> paFactory(pDC, state.topology, work.numVerts, gpVertexStore, numVerts, state.frontendState.vsVertexSize, GetNumVerts(state.topology, 1));
PA_STATE& pa = paFactory.GetPA();
#if USE_SIMD16_FRONTEND
@@ -1727,9 +1730,10 @@ void ProcessDraw(
if (HasRastT::value)
{
SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
-
- pa.useAlternateOffset = false;
- pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID);
+ {
+ pa.useAlternateOffset = false;
+ pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID);
+ }
}
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
index e76dc04..13f99cb 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -77,11 +77,12 @@ struct PA_STATE
#if ENABLE_AVX512_SIMD16
bool useAlternateOffset{ false };
+ uint32_t numVertsPerPrim{ 0 };
#endif
- PA_STATE() {}
- PA_STATE(DRAW_CONTEXT *in_pDC, uint8_t* in_pStreamBase, uint32_t in_streamSizeInVerts, uint32_t in_vertexStride) :
- pDC(in_pDC), pStreamBase(in_pStreamBase), streamSizeInVerts(in_streamSizeInVerts), vertexStride(in_vertexStride) {}
+ PA_STATE(){}
+ PA_STATE(DRAW_CONTEXT *in_pDC, uint8_t* in_pStreamBase, uint32_t in_streamSizeInVerts, uint32_t in_vertexStride, uint32_t in_numVertsPerPrim) :
+ pDC(in_pDC), pStreamBase(in_pStreamBase), streamSizeInVerts(in_streamSizeInVerts), vertexStride(in_vertexStride), numVertsPerPrim(in_numVertsPerPrim) {}
virtual bool HasWork() = 0;
virtual simdvector& GetSimdVector(uint32_t index, uint32_t slot) = 0;
@@ -165,7 +166,7 @@ struct PA_STATE_OPT : public PA_STATE
PA_STATE_OPT() {}
PA_STATE_OPT(DRAW_CONTEXT* pDC, uint32_t numPrims, uint8_t* pStream, uint32_t streamSizeInVerts,
- uint32_t vertexStride, bool in_isStreaming, PRIMITIVE_TOPOLOGY topo = TOP_UNKNOWN);
+ uint32_t vertexStride, bool in_isStreaming, uint32_t numVertsPerPrim, PRIMITIVE_TOPOLOGY topo = TOP_UNKNOWN);
bool HasWork()
{
@@ -430,8 +431,8 @@ struct PA_STATE_CUT : public PA_STATE
PA_STATE_CUT() {}
PA_STATE_CUT(DRAW_CONTEXT* pDC, uint8_t* in_pStream, uint32_t in_streamSizeInVerts, uint32_t in_vertexStride, SIMDMASK* in_pIndices, uint32_t in_numVerts,
- uint32_t in_numAttribs, PRIMITIVE_TOPOLOGY topo, bool in_processCutVerts)
- : PA_STATE(pDC, in_pStream, in_streamSizeInVerts, in_vertexStride)
+ uint32_t in_numAttribs, PRIMITIVE_TOPOLOGY topo, bool in_processCutVerts, uint32_t in_numVertsPerPrim)
+ : PA_STATE(pDC, in_pStream, in_streamSizeInVerts, in_vertexStride, in_numVertsPerPrim)
{
numVerts = in_streamSizeInVerts;
numAttribs = in_numAttribs;
@@ -1144,9 +1145,10 @@ struct PA_TESS : PA_STATE
uint32_t in_numAttributes,
uint32_t* (&in_ppIndices)[3],
uint32_t in_numPrims,
- PRIMITIVE_TOPOLOGY in_binTopology) :
+ PRIMITIVE_TOPOLOGY in_binTopology,
+ uint32_t numVertsPerPrim) :
- PA_STATE(in_pDC, nullptr, 0, in_vertexStride),
+ PA_STATE(in_pDC, nullptr, 0, in_vertexStride, numVertsPerPrim),
m_pVertexData(in_pVertData),
m_attributeStrideInVectors(in_attributeStrideInVectors),
m_numAttributes(in_numAttributes),
@@ -1416,7 +1418,7 @@ private:
template <typename IsIndexedT, typename IsCutIndexEnabledT>
struct PA_FACTORY
{
- PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t numVerts, PA_STATE::SIMDVERTEX *pVertexStore, uint32_t vertexStoreSize, uint32_t vertexStride) : topo(in_topo)
+ PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t numVerts, PA_STATE::SIMDVERTEX *pVertexStore, uint32_t vertexStoreSize, uint32_t vertexStride, uint32_t numVertsPerPrim) : topo(in_topo)
{
#if KNOB_ENABLE_CUT_AWARE_PA == TRUE
const API_STATE& state = GetApiState(pDC);
@@ -1433,14 +1435,14 @@ struct PA_FACTORY
uint32_t numAttribs = state.feNumAttributes;
new (&this->paCut) PA_STATE_CUT(pDC, reinterpret_cast<uint8_t *>(pVertexStore), vertexStoreSize * PA_STATE::SIMD_WIDTH,
- vertexStride, &this->indexStore[0], numVerts, numAttribs, state.topology, false);
+ vertexStride, &this->indexStore[0], numVerts, numAttribs, state.topology, false, numVertsPerPrim);
cutPA = true;
}
else
#endif
{
uint32_t numPrims = GetNumPrims(in_topo, numVerts);
- new (&this->paOpt) PA_STATE_OPT(pDC, numPrims, reinterpret_cast<uint8_t *>(pVertexStore), vertexStoreSize * PA_STATE::SIMD_WIDTH, vertexStride, false);
+ new (&this->paOpt) PA_STATE_OPT(pDC, numPrims, reinterpret_cast<uint8_t *>(pVertexStore), vertexStoreSize * PA_STATE::SIMD_WIDTH, vertexStride, false, numVertsPerPrim);
cutPA = false;
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
index e53389b..3bf66b3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
@@ -2588,8 +2588,8 @@ void PaRectListSingle0(
}
PA_STATE_OPT::PA_STATE_OPT(DRAW_CONTEXT *in_pDC, uint32_t in_numPrims, uint8_t* pStream, uint32_t in_streamSizeInVerts,
- uint32_t in_vertexStride, bool in_isStreaming, PRIMITIVE_TOPOLOGY topo) :
- PA_STATE(in_pDC, pStream, in_streamSizeInVerts, in_vertexStride), numPrims(in_numPrims), numPrimsComplete(0), numSimdPrims(0),
+ uint32_t in_vertexStride, bool in_isStreaming, uint32_t numVertsPerPrim, PRIMITIVE_TOPOLOGY topo) :
+ PA_STATE(in_pDC, pStream, in_streamSizeInVerts, in_vertexStride, numVertsPerPrim), numPrims(in_numPrims), numPrimsComplete(0), numSimdPrims(0),
cur(0), prev(0), first(0), counter(0), reset(false), pfnPaFunc(nullptr), isStreaming(in_isStreaming)
{
const API_STATE& state = GetApiState(pDC);
--
2.7.4
More information about the mesa-dev
mailing list