[Mesa-dev] [PATCH 08/20] swr/rast: Pull most of the VPAI manipulation out of the binner/clipper

Tim Rowley timothy.o.rowley at intel.com
Thu Dec 14 21:34:48 UTC 2017


Move out of binner/clipper; hand them down from the frontend code instead.
---
 src/gallium/drivers/swr/rasterizer/core/binner.cpp | 124 ++++++---------------
 src/gallium/drivers/swr/rasterizer/core/clip.cpp   |  25 ++---
 src/gallium/drivers/swr/rasterizer/core/clip.h     |  58 +++-------
 src/gallium/drivers/swr/rasterizer/core/context.h  |   4 +-
 .../drivers/swr/rasterizer/core/frontend.cpp       | 112 ++++++++++++++++++-
 src/gallium/drivers/swr/rasterizer/core/frontend.h |   8 +-
 src/gallium/drivers/swr/rasterizer/core/pa.h       |   4 +-
 7 files changed, 177 insertions(+), 158 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index 22996c5a5d..a664ed812f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -307,7 +307,8 @@ void SIMDCALL BinTrianglesImpl(
     uint32_t workerId,
     typename SIMD_T::Vec4 tri[3],
     uint32_t triMask,
-    typename SIMD_T::Integer const &primID)
+    typename SIMD_T::Integer const &primID,
+    typename SIMD_T::Integer const &viewportIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
 
@@ -323,31 +324,6 @@ void SIMDCALL BinTrianglesImpl(
     typename SIMD_T::Float vRecipW1 = SIMD_T::set1_ps(1.0f);
     typename SIMD_T::Float vRecipW2 = SIMD_T::set1_ps(1.0f);
 
-    typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
-    typename SIMD_T::Vec4 vpiAttrib[3];
-    typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
-
-    if (state.backendState.readViewportArrayIndex)
-    {
-        pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
-
-        vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
-    }
-
-
-    if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 
-    {
-        // OOB indices => forced to zero.
-        vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
-        typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
-        typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
-        viewportIdx = SIMD_T::and_si(vClearMask, vpai);
-    }
-    else
-    {
-        viewportIdx = vpai;
-    }
-
     if (feState.vpTransformDisable)
     {
         // RHW is passed in directly when VP transform is disabled
@@ -375,7 +351,7 @@ void SIMDCALL BinTrianglesImpl(
         tri[2].v[2] = SIMD_T::mul_ps(tri[2].v[2], vRecipW2);
 
         // Viewport transform to screen space coords
-        if (state.backendState.readViewportArrayIndex)
+        if (pa.viewportArrayActive)
         {
             viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
         }
@@ -568,8 +544,8 @@ void SIMDCALL BinTrianglesImpl(
     /// @todo:  Look at speeding this up -- weigh against corresponding costs in rasterizer.
     {
         typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax;
+        if (pa.viewportArrayActive)
 
-        if (state.backendState.readViewportArrayIndex)
         {
             GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
         }
@@ -786,9 +762,10 @@ void BinTriangles(
     uint32_t workerId,
     simdvector tri[3],
     uint32_t triMask,
-    simdscalari const &primID)
+    simdscalari const &primID,
+    simdscalari const &viewportIdx)
 {
-    BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID);
+    BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx);
 }
 
 #if USE_SIMD16_FRONTEND
@@ -799,9 +776,10 @@ void SIMDCALL BinTriangles_simd16(
     uint32_t workerId,
     simd16vector tri[3],
     uint32_t triMask,
-    simd16scalari const &primID)
+    simd16scalari const &primID,
+    simd16scalari const &viewportIdx)
 {
-    BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID);
+    BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx);
 }
 
 #endif
@@ -1026,7 +1004,7 @@ void BinPostSetupPointsImpl(
         {
             typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax;
 
-            if (state.backendState.readViewportArrayIndex)
+            if (pa.viewportArrayActive)
             {
                 GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
             }
@@ -1176,38 +1154,13 @@ void BinPointsImpl(
     uint32_t workerId,
     typename SIMD_T::Vec4 prim[3],
     uint32_t primMask,
-    typename SIMD_T::Integer const &primID)
+    typename SIMD_T::Integer const &primID,
+    typename SIMD_T::Integer const &viewportIdx)
 {
     const API_STATE& state = GetApiState(pDC);
     const SWR_FRONTEND_STATE& feState = state.frontendState;
     const SWR_RASTSTATE& rastState = state.rastState;
 
-    // Read back viewport index if required
-    typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
-    typename SIMD_T::Vec4 vpiAttrib[1];
-    typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
-
-    if (state.backendState.readViewportArrayIndex)
-    {
-        pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
-
-        vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
-    }
-
-
-    if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 
-    {
-        // OOB indices => forced to zero.
-        vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
-        typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
-        typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
-        viewportIdx = SIMD_T::and_si(vClearMask, vpai);
-    }
-    else
-    {
-        viewportIdx = vpai;
-    }
-
     if (!feState.vpTransformDisable)
     {
         // perspective divide
@@ -1218,7 +1171,7 @@ void BinPointsImpl(
         prim[0].z = SIMD_T::mul_ps(prim[0].z, vRecipW0);
 
         // viewport transform to screen coords
-        if (state.backendState.readViewportArrayIndex)
+        if (pa.viewportArrayActive)
         {
             viewportTransform<1>(prim, state.vpMatrices, viewportIdx);
         }
@@ -1249,7 +1202,8 @@ void BinPoints(
     uint32_t workerId,
     simdvector prim[3],
     uint32_t primMask,
-    simdscalari const &primID)
+    simdscalari const &primID,
+    simdscalari const &viewportIdx)
 {
     BinPointsImpl<SIMD256, KNOB_SIMD_WIDTH>(
         pDC,
@@ -1257,7 +1211,8 @@ void BinPoints(
         workerId,
         prim,
         primMask,
-        primID);
+        primID,
+        viewportIdx);
 }
 
 #if USE_SIMD16_FRONTEND
@@ -1267,7 +1222,8 @@ void SIMDCALL BinPoints_simd16(
     uint32_t workerId,
     simd16vector prim[3],
     uint32_t primMask,
-    simd16scalari const &primID)
+    simd16scalari const &primID,
+    simd16scalari const &viewportIdx)
 {
     BinPointsImpl<SIMD512, KNOB_SIMD16_WIDTH>(
         pDC,
@@ -1275,7 +1231,8 @@ void SIMDCALL BinPoints_simd16(
         workerId,
         prim,
         primMask,
-        primID);
+        primID,
+        viewportIdx);
 }
 
 #endif
@@ -1362,7 +1319,7 @@ void BinPostSetupLinesImpl(
     {
         typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax;
 
-        if (state.backendState.readViewportArrayIndex)
+        if (pa.viewportArrayActive)
         {
             GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
         }
@@ -1513,7 +1470,8 @@ void SIMDCALL BinLinesImpl(
     uint32_t workerId,
     typename SIMD_T::Vec4 prim[3],
     uint32_t primMask,
-    typename SIMD_T::Integer const &primID)
+    typename SIMD_T::Integer const &primID,
+    typename SIMD_T::Integer const &viewportIdx)
 {
     const API_STATE& state = GetApiState(pDC);
     const SWR_RASTSTATE& rastState = state.rastState;
@@ -1521,26 +1479,6 @@ void SIMDCALL BinLinesImpl(
 
     typename SIMD_T::Float vRecipW[2] = { SIMD_T::set1_ps(1.0f), SIMD_T::set1_ps(1.0f) };
 
-    typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
-    typename SIMD_T::Vec4 vpiAttrib[2];
-    typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
-
-    if (state.backendState.readViewportArrayIndex)
-    {
-        pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
-        vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
-    }
-
-
-    if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 
-    {
-        // OOB indices => forced to zero.
-        vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
-        typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
-        typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
-        viewportIdx = SIMD_T::and_si(vClearMask, vpai);
-    }
-
     if (!feState.vpTransformDisable)
     {
         // perspective divide
@@ -1557,7 +1495,7 @@ void SIMDCALL BinLinesImpl(
         prim[1].v[2] = SIMD_T::mul_ps(prim[1].v[2], vRecipW[1]);
 
         // viewport transform to screen coords
-        if (state.backendState.readViewportArrayIndex)
+        if (pa.viewportArrayActive)
         {
             viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
         }
@@ -1593,9 +1531,10 @@ void BinLines(
     uint32_t workerId,
     simdvector prim[],
     uint32_t primMask,
-    simdscalari const &primID)
+    simdscalari const &primID,
+    simdscalari const &viewportIdx)
 {
-    BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(pDC, pa, workerId, prim, primMask, primID);
+    BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx);
 }
 
 #if USE_SIMD16_FRONTEND
@@ -1605,9 +1544,10 @@ void SIMDCALL BinLines_simd16(
     uint32_t workerId,
     simd16vector prim[3],
     uint32_t primMask,
-    simd16scalari const &primID)
+    simd16scalari const &primID,
+    simd16scalari const &viewportIdx)
 {
-    BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(pDC, pa, workerId, prim, primMask, primID);
+    BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx);
 }
 
 #endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
index a40f077bea..d4da2c3bad 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
@@ -160,35 +160,35 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
     return i;
 }
 
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId)
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipTriangles, pDC->drawId);
     Clipper<SIMD256, 3> clipper(workerId, pDC);
-    clipper.ExecuteStage(pa, prims, primMask, primId);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
     AR_END(FEClipTriangles, 1);
 }
 
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId)
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipLines, pDC->drawId);
     Clipper<SIMD256, 2> clipper(workerId, pDC);
-    clipper.ExecuteStage(pa, prims, primMask, primId);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
     AR_END(FEClipLines, 1);
 }
 
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId)
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipPoints, pDC->drawId);
     Clipper<SIMD256, 1> clipper(workerId, pDC);
-    clipper.ExecuteStage(pa, prims, primMask, primId);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
     AR_END(FEClipPoints, 1);
 }
 
 #if USE_SIMD16_FRONTEND
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId)
+void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipTriangles, pDC->drawId);
@@ -198,12 +198,12 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor
     Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
 
     pa.useAlternateOffset = false;
-    clipper.ExecuteStage(pa, prims, primMask, primId);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
 
     AR_END(FEClipTriangles, 1);
 }
 
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId)
+void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipLines, pDC->drawId);
@@ -213,12 +213,12 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI
     Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
 
     pa.useAlternateOffset = false;
-    clipper.ExecuteStage(pa, prims, primMask, primId);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
 
     AR_END(FEClipLines, 1);
 }
 
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId)
+void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipPoints, pDC->drawId);
@@ -228,10 +228,9 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t worker
     Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
 
     pa.useAlternateOffset = false;
-    clipper.ExecuteStage(pa, prims, primMask, primId);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
 
     AR_END(FEClipPoints, 1);
 }
 
 #endif
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 9d8bbc19e6..148f661ab4 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -178,11 +178,11 @@ struct BinnerChooser<SIMD256>
         };
     }
 
-    void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID)
+    void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx)
     {
         SWR_ASSERT(pfnBinFunc != nullptr);
 
-        pfnBinFunc(pDC, pa, workerId, prims, primMask, primID);
+        pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx);
     }
 };
 
@@ -231,11 +231,11 @@ struct BinnerChooser<SIMD512>
         };
     }
 
-    void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID)
+    void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx)
     {
         SWR_ASSERT(pfnBinFunc != nullptr);
 
-        pfnBinFunc(pDC, pa, workerId, prims, primMask, primID);
+        pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx);
     }
 };
 
@@ -437,7 +437,7 @@ public:
         return SIMD_T::movemask_ps(vClipCullMask);
     }
 
-    void ClipSimd(const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa, const typename SIMD_T::Integer &vPrimId)
+    void ClipSimd(const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa, const typename SIMD_T::Integer &vPrimId, const typename SIMD_T::Integer &vViewportIdx)
     {
         // input/output vertex store for clipper
         SIMDVERTEX_T<SIMD_T> vertices[7]; // maximum 7 verts generated per triangle
@@ -538,6 +538,7 @@ public:
 
         const uint32_t *pVertexCount = reinterpret_cast<const uint32_t *>(&vNumClippedVerts);
         const uint32_t *pPrimitiveId = reinterpret_cast<const uint32_t *>(&vPrimId);
+        const uint32_t *pViewportIdx = reinterpret_cast<const uint32_t *>(&vViewportIdx);
 
         const SIMD256::Integer vOffsets = SIMD256::set_epi32(
             0 * sizeof(SIMDVERTEX_T<SIMD_T>), // unused lane
@@ -642,12 +643,14 @@ public:
             }
 
             PA_STATE_OPT clipPA(pDC, numEmittedPrims, reinterpret_cast<uint8_t *>(&transposedPrims[0]), numEmittedVerts, SWR_VTX_NUM_SLOTS, true, NumVertsPerPrim, clipTopology);
+            clipPA.viewportArrayActive = pa.viewportArrayActive;
 
             static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f };
 
             const uint32_t primMask = primMaskMap[numEmittedPrims];
 
             const typename SIMD_T::Integer primID = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]);
+            const typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(pViewportIdx[inputPrim]);
 
             while (clipPA.GetNextStreamOutput())
             {
@@ -659,7 +662,7 @@ public:
 
                     if (assemble)
                     {
-                        binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID);
+                        binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx);
                     }
 
                 } while (clipPA.NextPrim());
@@ -674,7 +677,7 @@ public:
         UPDATE_STAT_FE(CPrimitives, numClippedPrims);
     }
 
-    void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primId)
+    void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primId, typename SIMD_T::Integer const &viewportIdx)
     {
         SWR_ASSERT(pa.pDC != nullptr);
 
@@ -686,31 +689,6 @@ public:
         uint32_t numInvoc = _mm_popcnt_u32(primMask);
         UPDATE_STAT_FE(CInvocations, numInvoc);
 
-        // Read back viewport index if required
-        typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
-        typename SIMD_T::Vec4 vpiAttrib[NumVertsPerPrim];
-        typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
-
-        if (state.backendState.readViewportArrayIndex)
-        {
-            pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
-            vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
-        }
-
-
-        if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 
-        {
-            // OOB indices => forced to zero.
-            vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
-            typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
-            typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
-            viewportIdx = SIMD_T::and_si(vClearMask, vpai);
-        }
-        else
-        {
-            viewportIdx = vpai;
-        }
-
         ComputeClipCodes(prim, viewportIdx);
 
         // cull prims with NAN coords
@@ -738,7 +716,7 @@ public:
             AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
             // we have to clip tris, execute the clipper, which will also
             // call the binner
-            ClipSimd(SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId);
+            ClipSimd(SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx);
             AR_END(FEGuardbandClip, 1);
         }
         else if (validMask)
@@ -747,7 +725,7 @@ public:
             UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
 
             // forward valid prims directly to binner
-            binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId);
+            binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
         }
     }
 
@@ -1157,12 +1135,12 @@ private:
 
 
 // pipeline stage functions
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId);
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId);
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId);
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
 #if USE_SIMD16_FRONTEND
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId);
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId);
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId);
+void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
+void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
+void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
 #endif
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index ae942f182d..ef6719384f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -214,12 +214,12 @@ struct PA_STATE;
 
 // function signature for pipeline stages that execute after primitive assembly
 typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], 
-    uint32_t primMask, simdscalari const &primID);
+    uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
 
 #if ENABLE_AVX512_SIMD16
 // function signature for pipeline stages that execute after primitive assembly
 typedef void(SIMDCALL *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[],
-    uint32_t primMask, simd16scalari const &primID);
+    uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
 
 #endif
 OSALIGNLINE(struct) API_STATE
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 5a61dc33a0..3de79d600f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -988,13 +988,48 @@ static void GeometryShaderStage(
                             {
 #if USE_SIMD16_FRONTEND
                                 simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]);
+
+                                // Gather the VPAI from the SVG if provided.
+                                SIMD16::Vec4 vpiAttrib[3];
+                                SIMD16::Integer vViewportIdx = SIMD16::setzero_si();
+                                if (state.backendState.readViewportArrayIndex)
+                                {
+                                    gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+                                    vViewportIdx = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+                                    gsPa.viewportArrayActive = true;
+                                }
+
                                 {
+                                    // OOB VPAI indices => forced to zero.
+                                    vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
+                                    simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                                    simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
+                                    vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
+
                                     gsPa.useAlternateOffset = false;
-                                    pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId);
+                                    pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx);
                                 }
 #else
                                 simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
-                                pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId);
+
+                                // Gather the VPAI from the SVG if provided.
+                                SIMD8::Vec4 vpiAttrib[3];
+                                SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
+                                if (state.backendState.readViewportArrayIndex)
+                                {
+                                    gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+                                    vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+
+                                    // OOB VPAI indices => forced to zero.
+                                    vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
+                                    simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                                    simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
+                                    vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
+
+                                    gsPa.viewportArrayActive = true;
+                                }
+
+                                pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx);
 #endif
                             }
                         }
@@ -1337,14 +1372,46 @@ static void TessellationStages(
 
                     SWR_ASSERT(pfnClipFunc);
 #if USE_SIMD16_FRONTEND
+                    // Gather the VPAI from the SVG if provided.
+                    simd16scalari vpai = SIMD16::setzero_si();
+                    if (state.backendState.readViewportArrayIndex)
+                    {
+                        simd16vector vpiAttrib[4];
+                        tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+                        vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+                        tessPa.viewportArrayActive = true;
+                    }
+
 
                     {
+                        // OOB VPAI indices => forced to zero.
+                        vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si());
+                        simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                        simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports);
+                        vpai = SIMD16::and_si(vClearMask, vpai);
+
                         tessPa.useAlternateOffset = false;
-                        pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID);
+                        pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vpai);
                     }
 #else
+                    // Gather the VPAI from the SVG if provided.
+                    SIMD8::Vec4 vpiAttrib[3];
+                    SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
+                    if (state.backendState.readViewportArrayIndex)
+                    {
+                        tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+                        vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+
+                        // OOB VPAI indices => forced to zero.
+                        vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
+                        simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                        simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
+                        vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
+
+                        tessPa.viewportArrayActive = true;
+                    }
                     pfnClipFunc(pDC, tessPa, workerId, prim,
-                        GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID));
+                        GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx);
 #endif
                 }
             }
@@ -1736,9 +1803,25 @@ void ProcessDraw(
                                 if (HasRastT::value)
                                 {
                                     SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
+                                    // Gather the VPAI from the SVG if provided.
+                                    simd16scalari vpai = SIMD16::setzero_si();
+                                    if (state.backendState.readViewportArrayIndex)
+                                    {
+                                        simd16vector vpiAttrib[4];
+                                        pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+                                        vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+                                        pa.viewportArrayActive = true;
+                                    }
+
                                     {
+                                        // OOB VPAI indices => forced to zero.
+                                        vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si());
+                                        simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                                        simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports);
+                                        vpai = SIMD16::and_si(vClearMask, vpai);
+
                                         pa.useAlternateOffset = false;
-                                        pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID);
+                                        pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai);
                                     }
                                 }
                             }
@@ -1900,8 +1983,25 @@ void ProcessDraw(
                                 {
                                     SWR_ASSERT(pDC->pState->pfnProcessPrims);
 
+                                    // Gather the VPAI from the SVG if provided.
+                                    SIMD8::Vec4 vpiAttrib[3];
+                                    SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
+                                    if (state.backendState.readViewportArrayIndex)
+                                    {
+                                        pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+                                        vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+
+                                        // OOB VPAI indices => forced to zero.
+                                        vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
+                                        simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                                        simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
+                                        vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
+
+                                        pa.viewportArrayActive = true;
+                                    }
+
                                     pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
-                                        GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID));
+                                        GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx);
                                 }
                             }
                         }
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index 11099d6449..e2ca1274c5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -389,10 +389,10 @@ PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
 #endif
 
 struct PA_STATE_BASE;  // forward decl
-void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID);
-void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID);
+void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
+void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
 #if USE_SIMD16_FRONTEND
-void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID);
-void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID);
+void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
+void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
 #endif
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
index 13f99cb546..a0160d4a40 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -77,9 +77,11 @@ struct PA_STATE
 
 #if ENABLE_AVX512_SIMD16
     bool useAlternateOffset{ false };
+#endif
+
+    bool viewportArrayActive{ false };
     uint32_t numVertsPerPrim{ 0 };
 
-#endif
     PA_STATE(){}
     PA_STATE(DRAW_CONTEXT *in_pDC, uint8_t* in_pStreamBase, uint32_t in_streamSizeInVerts, uint32_t in_vertexStride, uint32_t in_numVertsPerPrim) :
         pDC(in_pDC), pStreamBase(in_pStreamBase), streamSizeInVerts(in_streamSizeInVerts), vertexStride(in_vertexStride), numVertsPerPrim(in_numVertsPerPrim) {}
-- 
2.14.1



More information about the mesa-dev mailing list