[Mesa-dev] [PATCH 05/11] swr: [rasterizer] attribute swizzling and linkage

Tim Rowley timothy.o.rowley at intel.com
Mon Jul 18 17:10:14 UTC 2016


Add support for enhanced attribute swizzling. Currently supports constant
source overrides to handle PrimitiveID support. No support yet for input
select swizzling or wrap shortest. Removes obsoleted linkageMask and
associated code.
---
 src/gallium/drivers/swr/rasterizer/core/api.cpp    |  56 +++---
 src/gallium/drivers/swr/rasterizer/core/api.h      |  13 --
 src/gallium/drivers/swr/rasterizer/core/clip.h     |   9 +-
 src/gallium/drivers/swr/rasterizer/core/context.h  |  11 +-
 .../drivers/swr/rasterizer/core/frontend.cpp       | 222 ++++++++++++++-------
 src/gallium/drivers/swr/rasterizer/core/pa.h       |  11 +-
 src/gallium/drivers/swr/rasterizer/core/state.h    |  29 ++-
 .../drivers/swr/rasterizer/jitter/fetch_jit.h      |  12 +-
 src/gallium/drivers/swr/swr_shader.cpp             |  12 --
 src/gallium/drivers/swr/swr_state.cpp              |  13 +-
 src/gallium/drivers/swr/swr_state.h                |   1 -
 11 files changed, 218 insertions(+), 171 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index aface7a..c3a1539 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -606,31 +606,6 @@ void SwrSetBlendFunc(
     pState->pfnBlendFunc[renderTarget] = pfnBlendFunc;
 }
 
-void SwrSetLinkage(
-    HANDLE hContext,
-    uint32_t mask,
-    const uint8_t* pMap)
-{
-    API_STATE* pState = GetDrawState(GetContext(hContext));
-
-    static const uint8_t IDENTITY_MAP[] =
-    {
-         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-    };
-    static_assert(sizeof(IDENTITY_MAP) == sizeof(pState->linkageMap),
-        "Update for new value of MAX_ATTRIBUTES");
-
-    pState->linkageMask = mask;
-    pState->linkageCount = _mm_popcnt_u32(mask);
-
-    if (!pMap)
-    {
-        pMap = IDENTITY_MAP;
-    }
-    memcpy(pState->linkageMap, pMap, pState->linkageCount);
-}
-
 // update guardband multipliers for the viewport
 void updateGuardband(API_STATE *pState)
 {
@@ -847,25 +822,44 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
         (pState->state.depthStencilState.depthWriteEnable == FALSE) &&
         (pState->state.depthStencilState.stencilTestEnable == FALSE) &&
         (pState->state.depthStencilState.stencilWriteEnable == FALSE) &&
-        (pState->state.linkageCount == 0))
+        (pState->state.backendState.numAttributes == 0))
     {
         pState->pfnProcessPrims = nullptr;
-        pState->state.linkageMask = 0;
     }
 
     if (pState->state.soState.rasterizerDisable == true)
     {
         pState->pfnProcessPrims = nullptr;
-        pState->state.linkageMask = 0;
     }
 
-    // set up the frontend attrib mask
-    pState->state.feAttribMask = pState->state.linkageMask;
+    // set up the frontend attribute count
+    pState->state.feNumAttributes = 0;
+    const SWR_BACKEND_STATE& backendState = pState->state.backendState;
+    if (backendState.swizzleEnable)
+    {
+        // attribute swizzling is enabled, iterate over the map and record the max attribute used
+        for (uint32_t i = 0; i < backendState.numAttributes; ++i)
+        {
+            pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1);
+        }
+    }
+    else
+    {
+        pState->state.feNumAttributes = pState->state.backendState.numAttributes;
+    }
+
     if (pState->state.soState.soEnable)
     {
+        uint32_t streamMasks = 0;
         for (uint32_t i = 0; i < 4; ++i)
         {
-            pState->state.feAttribMask |= pState->state.soState.streamMasks[i];
+            streamMasks |= pState->state.soState.streamMasks[i];
+        }
+
+        DWORD maxAttrib;
+        if (_BitScanReverse(&maxAttrib, streamMasks))
+        {
+            pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
         }
     }
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index 04cdb9e..ab56cab 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -330,19 +330,6 @@ void SWR_API SwrSetBlendFunc(
     PFN_BLEND_JIT_FUNC pfnBlendFunc);
 
 //////////////////////////////////////////////////////////////////////////
-/// @brief Set linkage mask
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param mask - Specifies which vertex outputs are are needed by PS.
-/// @param pMap - (Optional)Linkage map to specify where FE attributes are
-///               gathered from to supply PS attribute values.  The length
-///               of the map buffer needs to match the number of set bits
-///               in "mask".
-void SWR_API SwrSetLinkage(
-    HANDLE hContext,
-    uint32_t mask,
-    const uint8_t* pMap);
-
-//////////////////////////////////////////////////////////////////////////
 /// @brief SwrDraw
 /// @param hContext - Handle passed back from SwrCreateContext
 /// @param topology - Specifies topology for draw.
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 1a6fc6d..b2b3bb4 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -324,16 +324,13 @@ public:
         }
 
         // assemble attribs
-        DWORD slot = 0;
-        uint32_t mapIdx = 0;
-        uint32_t tmpLinkage = this->state.linkageMask;
+        const SWR_BACKEND_STATE& backendState = this->state.backendState;
 
         int32_t maxSlot = -1;
-        while (_BitScanForward(&slot, tmpLinkage))
+        for (uint32_t slot = 0; slot < backendState.numAttributes; ++slot)
         {
-            tmpLinkage &= ~(1 << slot);
             // Compute absolute attrib slot in vertex array
-            uint32_t mapSlot = this->state.linkageMap[mapIdx++];
+            uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
             maxSlot = std::max<int32_t>(maxSlot, mapSlot);
             uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot;
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index be4c2e9..13dcdfc 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -264,15 +264,8 @@ OSALIGNLINE(struct) API_STATE
     PFN_DS_FUNC             pfnDsFunc;
     SWR_TS_STATE            tsState;
 
-    // Specifies which VS outputs are sent to PS.
-    // Does not include position
-    uint32_t                linkageMask; 
-    uint32_t                linkageCount;
-    uint8_t                 linkageMap[MAX_ATTRIBUTES];
-
-    // attrib mask, specifies the total set of attributes used
-    // by the frontend (vs, so, gs)
-    uint32_t                feAttribMask;
+    // Number of attributes used by the frontend (vs, so, gs)
+    uint32_t                feNumAttributes;
 
     PRIMITIVE_TOPOLOGY      topology;
     bool                    forceFront;
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index cc8ebda..8537c59 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -794,15 +794,7 @@ static void GeometryShaderStage(
             uint8_t* pBase = pInstanceBase + instance * instanceStride;
             uint8_t* pCutBase = pCutBufferBase + instance * cutInstanceStride;
             
-            DWORD numAttribs;
-            if (_BitScanReverse(&numAttribs, state.feAttribMask))
-            {
-                numAttribs++;
-            }
-            else
-            {
-                numAttribs = 0;
-            }
+            uint32_t numAttribs = state.feNumAttributes;
 
             for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
             {
@@ -1445,7 +1437,6 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc(
     return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization);
 }
 
-
 //////////////////////////////////////////////////////////////////////////
 /// @brief Processes attributes for the backend based on linkage mask and
 ///        linkage map.  Essentially just doing an SOA->AOS conversion and pack.
@@ -1455,75 +1446,101 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc(
 /// @param pLinkageMap - maps VS attribute slot to PS slot
 /// @param triIndex - Triangle to process attributes for
 /// @param pBuffer - Output result
-template<uint32_t NumVerts>
+template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT>
 INLINE void ProcessAttributes(
     DRAW_CONTEXT *pDC,
     PA_STATE&pa,
-    uint32_t linkageMask,
-    const uint8_t* pLinkageMap,
     uint32_t triIndex,
+    uint32_t primId,
     float *pBuffer)
 {
-    DWORD slot = 0;
-    uint32_t mapIdx = 0;
-    LONG constantInterpMask = pDC->pState->state.backendState.constantInterpolationMask;
+    static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT");
+    const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
+    LONG constantInterpMask = backendState.constantInterpolationMask;
     const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex;
     const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology;
 
-    while (_BitScanForward(&slot, linkageMask))
+    static const float constTable[3][4] = {
+        {0.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, 0.0f, 0.0f, 1.0f},
+        {1.0f, 1.0f, 1.0f, 1.0f}
+    };
+
+    for (uint32_t i = 0; i < backendState.numAttributes; ++i)
     {
-        linkageMask &= ~(1 << slot); // done with this bit.
+        uint32_t inputSlot;
+        if (IsSwizzledT::value)
+        {
+            SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i];
+            inputSlot = VERTEX_ATTRIB_START_SLOT + attribSwizzle.sourceAttrib;
 
-        // compute absolute slot in vertex attrib array
-        uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + pLinkageMap[mapIdx];
+        }
+        else
+        {
+            inputSlot = VERTEX_ATTRIB_START_SLOT + i;
+        }
 
         __m128 attrib[3];    // triangle attribs (always 4 wide)
+        static const uint32_t numVerts = NumVertsT::value < 3 ? NumVertsT::value : 3;
+        float* pAttribStart = pBuffer;
 
-        if (_bittest(&constantInterpMask, mapIdx))
+        if (HasConstantInterpT::value)
         {
-            uint32_t vid;
-            static const uint32_t tristripProvokingVertex[] = {0, 2, 1};
-            static const int32_t quadProvokingTri[2][4] = {{0, 0, 0, 1}, {0, -1, 0, 0}};
-            static const uint32_t quadProvokingVertex[2][4] = {{0, 1, 2, 2}, {0, 1, 1, 2}};
-            static const int32_t qstripProvokingTri[2][4] = {{0, 0, 0, 1}, {-1, 0, 0, 0}};
-            static const uint32_t qstripProvokingVertex[2][4] = {{0, 1, 2, 1}, {0, 0, 2, 1}};
-
-            switch (topo) {
-            case TOP_QUAD_LIST:
-                pa.AssembleSingle(inputSlot,
-                                  triIndex + quadProvokingTri[triIndex & 1][provokingVertex],
-                                  attrib);
-                vid = quadProvokingVertex[triIndex & 1][provokingVertex];
-                break;
-            case TOP_QUAD_STRIP:
-                pa.AssembleSingle(inputSlot,
-                                  triIndex + qstripProvokingTri[triIndex & 1][provokingVertex],
-                                  attrib);
-                vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
-                break;
-            case TOP_TRIANGLE_STRIP:
-               pa.AssembleSingle(inputSlot, triIndex, attrib);
-               vid = (triIndex & 1)
-                   ? tristripProvokingVertex[provokingVertex]
-                   : provokingVertex;
-               break;
-            default:
-                pa.AssembleSingle(inputSlot, triIndex, attrib);
-                vid = provokingVertex;
-                break;
-            }
+            if (_bittest(&constantInterpMask, i))
+            {
+                uint32_t vid;
+                uint32_t adjustedTriIndex;
+                static const uint32_t tristripProvokingVertex[] = { 0, 2, 1 };
+                static const int32_t quadProvokingTri[2][4] = { {0, 0, 0, 1}, {0, -1, 0, 0} };
+                static const uint32_t quadProvokingVertex[2][4] = { {0, 1, 2, 2}, {0, 1, 1, 2} };
+                static const int32_t qstripProvokingTri[2][4] = { {0, 0, 0, 1}, {-1, 0, 0, 0} };
+                static const uint32_t qstripProvokingVertex[2][4] = { {0, 1, 2, 1}, {0, 0, 2, 1} };
+
+                switch (topo) {
+                case TOP_QUAD_LIST:
+                    adjustedTriIndex = triIndex + quadProvokingTri[triIndex & 1][provokingVertex];
+                    vid = quadProvokingVertex[triIndex & 1][provokingVertex];
+                    break;
+                case TOP_QUAD_STRIP:
+                    adjustedTriIndex = triIndex + qstripProvokingTri[triIndex & 1][provokingVertex];
+                    vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
+                    break;
+                case TOP_TRIANGLE_STRIP:
+                    adjustedTriIndex = triIndex;
+                    vid = (triIndex & 1)
+                        ? tristripProvokingVertex[provokingVertex]
+                        : provokingVertex;
+                    break;
+                default:
+                    adjustedTriIndex = triIndex;
+                    vid = provokingVertex;
+                    break;
+                }
+
+                pa.AssembleSingle(inputSlot, adjustedTriIndex, attrib);
 
-            for (uint32_t i = 0; i < NumVerts; ++i)
+                for (uint32_t i = 0; i < numVerts; ++i)
+                {
+                    _mm_store_ps(pBuffer, attrib[vid]);
+                    pBuffer += 4;
+                }
+            }
+            else
             {
-                _mm_store_ps(pBuffer, attrib[vid]);
-                pBuffer += 4;
+                pa.AssembleSingle(inputSlot, triIndex, attrib);
+
+                for (uint32_t i = 0; i < numVerts; ++i)
+                {
+                    _mm_store_ps(pBuffer, attrib[i]);
+                    pBuffer += 4;
+                }
             }
         }
         else
         {
             pa.AssembleSingle(inputSlot, triIndex, attrib);
 
-            for (uint32_t i = 0; i < NumVerts; ++i)
+            for (uint32_t i = 0; i < numVerts; ++i)
             {
                 _mm_store_ps(pBuffer, attrib[i]);
                 pBuffer += 4;
@@ -1534,16 +1551,66 @@ INLINE void ProcessAttributes(
         // interpolation code in the pixel shader works correctly for the
         // 3 topologies - point, line, tri.  This effectively zeros out the
         // effect of the missing vertices in the triangle interpolation.
-        for (uint32_t i = NumVerts; i < 3; ++i)
+        for (uint32_t v = numVerts; v < 3; ++v)
         {
-            _mm_store_ps(pBuffer, attrib[NumVerts - 1]);
+            _mm_store_ps(pBuffer, attrib[numVerts - 1]);
             pBuffer += 4;
         }
 
-        mapIdx++;
+        // check for constant source overrides
+        if (IsSwizzledT::value)
+        {
+            uint32_t mask = backendState.swizzleMap[i].componentOverrideMask;
+            if (mask)
+            {
+                DWORD comp;
+                while (_BitScanForward(&comp, mask))
+                {
+                    mask &= ~(1 << comp);
+
+                    float constantValue = 0.0f;
+                    switch ((SWR_CONSTANT_SOURCE)backendState.swizzleMap[i].constantSource)
+                    {
+                    case SWR_CONSTANT_SOURCE_CONST_0000:
+                    case SWR_CONSTANT_SOURCE_CONST_0001_FLOAT:
+                    case SWR_CONSTANT_SOURCE_CONST_1111_FLOAT:
+                        constantValue = constTable[backendState.swizzleMap[i].constantSource][comp];
+                        break;
+                    case SWR_CONSTANT_SOURCE_PRIM_ID:
+                        constantValue = *(float*)&primId;
+                        break;
+                    }
+
+                    // apply constant value to all 3 vertices
+                    for (uint32_t v = 0; v < 3; ++v)
+                    {
+                        pAttribStart[comp + v * 4] = constantValue;
+                    }
+                }
+            }
+        }
     }
 }
 
+
+typedef void(*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*);
+
+struct ProcessAttributesChooser
+{
+    typedef PFN_PROCESS_ATTRIBUTES FuncType;
+
+    template <typename... ArgsB>
+    static FuncType GetFunc()
+    {
+        return ProcessAttributes<ArgsB...>;
+    }
+};
+
+PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp)
+{
+    return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(NumVerts, IsSwizzled, HasConstantInterp);
+}
+
 //////////////////////////////////////////////////////////////////////////
 /// @brief Processes enabled user clip distances. Loads the active clip
 ///        distances from the PA, sets up barycentric equations, and
@@ -1742,6 +1809,10 @@ void BinTriangles(
     const SWR_GS_STATE& gsState = state.gsState;
     MacroTileMgr *pTileMgr = pDC->pTileMgr;
 
+    // Select attribute processor
+    PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
+        state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
 
     simdscalar vRecipW0 = _simd_set1_ps(1.0f);
     simdscalar vRecipW1 = _simd_set1_ps(1.0f);
@@ -1951,8 +2022,7 @@ void BinTriangles(
     // scan remaining valid triangles and bin each separately
     while (_BitScanForward(&triIndex, triMask))
     {
-        uint32_t linkageCount = state.linkageCount;
-        uint32_t linkageMask  = state.linkageMask;
+        uint32_t linkageCount = state.backendState.numAttributes;
         uint32_t numScalarAttribs = linkageCount * 4;
 
         BE_WORK work;
@@ -1972,7 +2042,7 @@ void BinTriangles(
         float *pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
         desc.pAttribs = pAttribs;
         desc.numAttribs = linkageCount;
-        ProcessAttributes<3>(pDC, pa, linkageMask, state.linkageMap, triIndex, desc.pAttribs);
+        pfnProcessAttribs(pDC, pa, triIndex, pPrimID[triIndex], desc.pAttribs);
 
         // store triangle vertex data
         desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
@@ -2050,6 +2120,10 @@ void BinPoints(
     const SWR_GS_STATE& gsState = state.gsState;
     const SWR_RASTSTATE& rastState = state.rastState;
 
+    // Select attribute processor
+    PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(1,
+        state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
     if (!feState.vpTransformDisable)
     {
         // perspective divide
@@ -2130,12 +2204,13 @@ void BinPoints(
 
         uint32_t *pPrimID = (uint32_t *)&primID;
         DWORD primIndex = 0;
+
+        const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
+
         // scan remaining valid triangles and bin each separately
         while (_BitScanForward(&primIndex, primMask))
         {
-            uint32_t linkageCount = state.linkageCount;
-            uint32_t linkageMask = state.linkageMask;
-
+            uint32_t linkageCount = backendState.numAttributes;
             uint32_t numScalarAttribs = linkageCount * 4;
 
             BE_WORK work;
@@ -2158,7 +2233,7 @@ void BinPoints(
             desc.pAttribs = pAttribs;
             desc.numAttribs = linkageCount;
 
-            ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, pAttribs);
+            pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], pAttribs);
 
             // store raster tile aligned x, y, perspective correct z
             float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
@@ -2265,11 +2340,11 @@ void BinPoints(
         _simd_store_ps((float*)aPrimVertsZ, primVerts.z);
 
         // scan remaining valid prims and bin each separately
+        const SWR_BACKEND_STATE& backendState = state.backendState;
         DWORD primIndex;
         while (_BitScanForward(&primIndex, primMask))
         {
-            uint32_t linkageCount = state.linkageCount;
-            uint32_t linkageMask = state.linkageMask;
+            uint32_t linkageCount = backendState.numAttributes;
             uint32_t numScalarAttribs = linkageCount * 4;
 
             BE_WORK work;
@@ -2290,7 +2365,7 @@ void BinPoints(
             // store active attribs
             desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
             desc.numAttribs = linkageCount;
-            ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs);
+            pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
 
             // store point vertex data
             float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
@@ -2353,6 +2428,10 @@ void BinLines(
     const SWR_FRONTEND_STATE& feState = state.frontendState;
     const SWR_GS_STATE& gsState = state.gsState;
 
+    // Select attribute processor
+    PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(2,
+    state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
     simdscalar vRecipW0 = _simd_set1_ps(1.0f);
     simdscalar vRecipW1 = _simd_set1_ps(1.0f);
 
@@ -2485,8 +2564,7 @@ void BinLines(
     DWORD primIndex;
     while (_BitScanForward(&primIndex, primMask))
     {
-        uint32_t linkageCount = state.linkageCount;
-        uint32_t linkageMask = state.linkageMask;
+        uint32_t linkageCount = state.backendState.numAttributes;
         uint32_t numScalarAttribs = linkageCount * 4;
 
         BE_WORK work;
@@ -2507,7 +2585,7 @@ void BinLines(
         // store active attribs
         desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
         desc.numAttribs = linkageCount;
-        ProcessAttributes<2>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs);
+        pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
 
         // store line vertex data
         desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
index 6aa73c1..64932af 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -1169,15 +1169,8 @@ struct PA_FACTORY
             topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ)))
         {
             memset(&indexStore, 0, sizeof(indexStore));
-            DWORD numAttribs;
-            if (_BitScanReverse(&numAttribs, state.feAttribMask))
-            {
-                numAttribs++;
-            }
-            else
-            {
-                numAttribs = 0;
-            }
+            uint32_t numAttribs = state.feNumAttributes;
+
             new (&this->paCut) PA_STATE_CUT(pDC, (uint8_t*)&this->vertexStore[0], MAX_NUM_VERTS_PER_PRIM * KNOB_SIMD_WIDTH, 
                 &this->indexStore[0], numVerts, numAttribs, state.topology, false);
             cutPA = true;
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 9fc304a..0931c82 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -938,13 +938,34 @@ struct SWR_RASTSTATE
     uint8_t clipDistanceMask;
 };
 
+enum SWR_CONSTANT_SOURCE
+{
+    SWR_CONSTANT_SOURCE_CONST_0000,
+    SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
+    SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
+    SWR_CONSTANT_SOURCE_PRIM_ID
+};
+
+struct SWR_ATTRIB_SWIZZLE
+{
+    uint16_t sourceAttrib : 5;          // source attribute 
+    uint16_t constantSource : 2;        // constant source to apply
+    uint16_t componentOverrideMask : 4; // override component with constant source
+};
+
 // backend state
 struct SWR_BACKEND_STATE
 {
-    uint32_t constantInterpolationMask;
-    uint32_t pointSpriteTexCoordMask;
-    uint8_t numAttributes;
-    uint8_t numComponents[KNOB_NUM_ATTRIBUTES];
+    uint32_t constantInterpolationMask;     // bitmask indicating which attributes have constant interpolation
+    uint32_t pointSpriteTexCoordMask;       // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
+
+    uint8_t numAttributes;                  // total number of attributes to send to backend (up to 32)
+    uint8_t numComponents[32];              // number of components to setup per attribute, this reduces some calculations for unneeded components
+
+    bool swizzleEnable;                 // when enabled, core will parse the swizzle map when 
+                                        // setting up attributes for the backend, otherwise
+                                        // all attributes up to numAttributes will be sent
+    SWR_ATTRIB_SWIZZLE swizzleMap[32];
 };
 
 
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
index 1d8e9a1..d3181cd 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
@@ -80,12 +80,12 @@ enum ComponentEnable
 
 enum ComponentControl
 {
-    NoStore     = 0,
-    StoreSrc    = 1,
-    Store0      = 2,
-    Store1Fp    = 3,
-    Store1Int   = 4,
-    StoreVertexId = 5,
+    NoStore         = 0,
+    StoreSrc        = 1,
+    Store0          = 2,
+    Store1Fp        = 3,
+    Store1Int       = 4,
+    StoreVertexId   = 5,
     StoreInstanceId = 6
 };
 
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index 4d1b604..ecb4545 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -157,18 +157,6 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
 {
    struct swr_vertex_shader *swr_vs = ctx->vs;
 
-   swr_vs->linkageMask = 0;
-
-   for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) {
-      switch (swr_vs->info.base.output_semantic_name[i]) {
-      case TGSI_SEMANTIC_POSITION:
-         break;
-      default:
-         swr_vs->linkageMask |= (1 << i);
-         break;
-      }
-   }
-
    LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
 
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index 5caaa5c..dac95ce 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1373,16 +1373,13 @@ swr_update_derived(struct pipe_context *pipe,
       }
    }
 
-   uint32_t linkage = ctx->vs->linkageMask;
-   if (ctx->rasterizer->sprite_coord_enable)
-      linkage |= (1 << ctx->vs->info.base.num_outputs);
-
-   SwrSetLinkage(ctx->swrContext, linkage, NULL);
-
    // set up backend state
    SWR_BACKEND_STATE backendState = {0};
-   backendState.numAttributes = 1;
-   backendState.numComponents[0] = 4;
+   backendState.numAttributes =
+      ctx->vs->info.base.num_outputs - 1 +
+      (ctx->rasterizer->sprite_coord_enable ? 1 : 0);
+   for (unsigned i = 0; i < backendState.numAttributes; i++)
+      backendState.numComponents[i] = 4;
    backendState.constantInterpolationMask =
       ctx->rasterizer->flatshade ?
       ctx->fs->flatConstantMask :
diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h
index cb69964..dcb1145 100644
--- a/src/gallium/drivers/swr/swr_state.h
+++ b/src/gallium/drivers/swr/swr_state.h
@@ -53,7 +53,6 @@ typedef ShaderVariant<PFN_PIXEL_KERNEL> VariantFS;
 struct swr_vertex_shader {
    struct pipe_shader_state pipe;
    struct lp_tgsi_info info;
-   unsigned linkageMask;
    std::unordered_map<swr_jit_vs_key, std::unique_ptr<VariantVS>> map;
    SWR_STREAMOUT_STATE soState;
    PFN_SO_FUNC soFunc[PIPE_PRIM_MAX] {0};
-- 
1.9.1



More information about the mesa-dev mailing list