[Mesa-dev] [PATCH v2 09/14] swr: [rasterizer core] conservative rasterization frontend support

Tim Rowley timothy.o.rowley at intel.com
Mon Jun 20 21:37:03 UTC 2016


---
 src/gallium/drivers/swr/Makefile.sources           |   1 +
 src/gallium/drivers/swr/rasterizer/core/api.cpp    |  13 +-
 src/gallium/drivers/swr/rasterizer/core/clip.h     |   4 +-
 .../drivers/swr/rasterizer/core/conservativeRast.h | 120 +++++++++++++++
 src/gallium/drivers/swr/rasterizer/core/context.h  |   2 +
 .../drivers/swr/rasterizer/core/frontend.cpp       | 163 +++++++++++++++++++--
 src/gallium/drivers/swr/rasterizer/core/frontend.h |  43 +-----
 .../drivers/swr/rasterizer/core/rasterizer.h       |   8 +
 src/gallium/drivers/swr/rasterizer/core/state.h    |   4 +-
 src/gallium/drivers/swr/rasterizer/core/utils.h    |  30 ++++
 10 files changed, 325 insertions(+), 63 deletions(-)
 create mode 100644 src/gallium/drivers/swr/rasterizer/core/conservativeRast.h

diff --git a/src/gallium/drivers/swr/Makefile.sources b/src/gallium/drivers/swr/Makefile.sources
index f9448ee..8d97a75 100644
--- a/src/gallium/drivers/swr/Makefile.sources
+++ b/src/gallium/drivers/swr/Makefile.sources
@@ -67,6 +67,7 @@ CORE_CXX_SOURCES := \
 	rasterizer/core/blend.h \
 	rasterizer/core/clip.cpp \
 	rasterizer/core/clip.h \
+	rasterizer/core/conservativeRast.h \
 	rasterizer/core/context.h \
 	rasterizer/core/depthstencil.h \
 	rasterizer/core/fifo.hpp \
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 22a94fb..cec4519 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -780,10 +780,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
         const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
         const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
         const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
-
-        // currently only support 'normal' input coverage
-        SWR_ASSERT(psState.inputCoverage == SWR_INPUT_COVERAGE_NORMAL ||
-                   psState.inputCoverage == SWR_INPUT_COVERAGE_NONE);
+        const uint32_t inputCoverage = (psState.inputCoverage != SWR_INPUT_COVERAGE_NONE);
      
         SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
         
@@ -795,20 +792,20 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
             {
                 // always need to generate I & J per sample for Z interpolation
                 barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
-                backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage][centroid][forcedSampleCount][canEarlyZ];
+                backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][inputCoverage][centroid][forcedSampleCount][canEarlyZ];
             }
             else
             {
                 // always need to generate I & J per pixel for Z interpolation
                 barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK);
-                backendFuncs.pfnBackend = gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ];
+                backendFuncs.pfnBackend = gBackendSingleSample[inputCoverage][centroid][canEarlyZ];
             }
             break;
         case SWR_SHADING_RATE_SAMPLE:
             SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN);
             // always need to generate I & J per sample for Z interpolation
             barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
-            backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];
+            backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][inputCoverage][centroid][canEarlyZ];
             break;
         default:
             SWR_ASSERT(0 && "Invalid shading rate");
@@ -833,7 +830,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
         break;
     default:
         pState->pfnProcessPrims = ClipTriangles;
-        pfnBinner = BinTriangles;
+        pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0));
         break;
     };
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 67a4c4f..1a6fc6d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -385,7 +385,7 @@ public:
         PRIMITIVE_TOPOLOGY clipTopology = TOP_UNKNOWN;
         if (NumVertsPerPrim == 3)
         {
-            pfnBinFunc = BinTriangles;
+            pfnBinFunc = GetBinTrianglesFunc((pa.pDC->pState->state.rastState.conservativeRast > 0));
             clipTopology = TOP_TRIANGLE_FAN;
 
             // so that the binner knows to bloat wide points later
@@ -519,7 +519,7 @@ public:
             pfnBinner = BinLines;
             break;
         default:
-            pfnBinner = BinTriangles;
+            pfnBinner = GetBinTrianglesFunc((pa.pDC->pState->state.rastState.conservativeRast > 0));
             break;
         };
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
new file mode 100644
index 0000000..f8aa8df
--- /dev/null
+++ b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
@@ -0,0 +1,120 @@
+/****************************************************************************
+* Copyright (C) 2014-2016 Intel Corporation.   All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*
+* @file conservativerast.h
+*
+******************************************************************************/
+#pragma once
+#include <type_traits>
+#include "common/simdintrin.h"
+
+enum FixedPointFmt
+{
+    FP_UNINIT,
+    _16_8,
+    _16_9
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief convenience typedefs for supported Fixed Point precisions
+typedef std::integral_constant<uint32_t, FP_UNINIT> Fixed_Uninit;
+typedef std::integral_constant<uint32_t, _16_8> Fixed_16_8;
+typedef std::integral_constant<uint32_t, _16_9> Fixed_16_9;
+
+//////////////////////////////////////////////////////////////////////////
+/// @struct FixedPointTraits
+/// @brief holds constants relating to converting between FP and Fixed point 
+/// @tparam FT: fixed precision type
+template<typename FT>
+struct FixedPointTraits{};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Fixed_16_8 specialization of FixedPointTraits
+template<>
+struct FixedPointTraits<Fixed_16_8>
+{
+    /// multiplier to go from FP32 to Fixed Point 16.8
+    typedef std::integral_constant<uint32_t, 256> FixedPointScaleT;
+    /// number of bits to shift to go from 16.8 fixed => int32
+    typedef std::integral_constant<uint32_t, 8> FixedPointShiftT;
+    typedef Fixed_16_8 TypeT;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Fixed_16_9 specialization of FixedPointTraits
+template<>
+struct FixedPointTraits<Fixed_16_9>
+{
+    /// multiplier to go from FP32 to Fixed Point 16.9
+    typedef std::integral_constant<uint32_t, 512> FixedPointScaleT;
+    /// number of bits to shift to go from 16.9 fixed => int32
+    typedef std::integral_constant<uint32_t, 9> FixedPointShiftT;
+    typedef Fixed_16_9 TypeT;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief convenience typedefs for conservative rasterization modes  
+typedef std::false_type StandardRastT;
+typedef std::true_type ConservativeRastT;
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief convenience typedefs for Input Coverage rasterization modes  
+typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NONE> NoInputCoverageT;
+typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NORMAL> OuterConservativeCoverageT;
+typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE> InnerConservativeCoverageT;
+
+//////////////////////////////////////////////////////////////////////////
+/// @struct ConservativeRastTraits
+/// @brief primary ConservativeRastTraits template. Shouldn't be instantiated
+/// @tparam ConservativeT: type of conservative rasterization
+template <typename ConservativeT>
+struct ConservativeRastFETraits {};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief StandardRast specialization of ConservativeRastTraits
+template <>
+struct ConservativeRastFETraits<StandardRastT>
+{
+    typedef std::false_type IsConservativeT;
+    typedef FixedPointTraits<Fixed_16_8> BBoxPrecisionT;
+    typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief ConservativeRastT specialization of ConservativeRastTraits
+template <>
+struct ConservativeRastFETraits<ConservativeRastT>
+{
+    typedef std::true_type IsConservativeT;
+    typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
+
+    /// Conservative bounding box needs to expand the area around each vertex by 1/512, which 
+    /// is the potential snapping error when going from FP-> 16.8 fixed
+    typedef FixedPointTraits<Fixed_16_9> BBoxPrecisionT;
+    typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT;
+    typedef std::integral_constant<uint32_t, 1> BoundingBoxShiftT;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief convenience typedefs for ConservativeRastFETraits 
+typedef ConservativeRastFETraits<StandardRastT> FEStandardRastT;
+typedef ConservativeRastFETraits<ConservativeRastT> FEConservativeRastT;
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 03e5837..3204352 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -83,6 +83,8 @@ struct SWR_TRIANGLE_DESC
     float *pUserClipBuffer;
 
     uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
+    uint64_t conservativeCoverageMask;
+    uint64_t innerConservativeCoverageMask;
     uint64_t anyCoveredSamples;
 
     TRI_FLAGS triFlags;
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index f86f8fa..7f3e33e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -33,6 +33,7 @@
 #include "context.h"
 #include "rdtsc_core.h"
 #include "rasterizer.h"
+#include "conservativeRast.h"
 #include "utils.h"
 #include "threads.h"
 #include "pa.h"
@@ -1590,6 +1591,132 @@ void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask,
 }
 
 //////////////////////////////////////////////////////////////////////////
+/// @brief Convert the X,Y coords of a triangle to the requested Fixed 
+/// Point precision from FP32.
+template <typename PT = FixedPointTraits<Fixed_16_8>>
+INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn)
+{
+    simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(PT::FixedPointScaleT::value));
+    return _simd_cvtps_epi32(vFixed);
+}
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Helper function to set the X,Y coords of a triangle to the 
+/// requested Fixed Point precision from FP32. If the RequestedT
+/// FixedPointTraits precision is the same as the CurrentT, no extra
+/// conversions will be done. If they are different, convert from FP32
+/// to the Requested precision and set vXi, vYi
+/// @tparam RequestedT: requested FixedPointTraits type
+/// @tparam CurrentT: FixedPointTraits type of the last 
+template<typename RequestedT, typename CurrentT = FixedPointTraits<Fixed_Uninit>>
+struct FPToFixedPoint
+{
+    //////////////////////////////////////////////////////////////////////////
+    /// @param tri: simdvector[3] of FP triangle verts
+    /// @param vXi: fixed point X coords of tri verts
+    /// @param vYi: fixed point Y coords of tri verts
+    INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3])
+    {
+        vXi[0] = fpToFixedPointVertical<RequestedT>(tri[0].x);
+        vYi[0] = fpToFixedPointVertical<RequestedT>(tri[0].y);
+        vXi[1] = fpToFixedPointVertical<RequestedT>(tri[1].x);
+        vYi[1] = fpToFixedPointVertical<RequestedT>(tri[1].y);
+        vXi[2] = fpToFixedPointVertical<RequestedT>(tri[2].x);
+        vYi[2] = fpToFixedPointVertical<RequestedT>(tri[2].y);
+    };
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief In the case where the RequestedT and CurrentT fixed point 
+/// precisions are the same, do nothing.
+template<typename RequestedT>
+struct FPToFixedPoint<RequestedT, RequestedT>
+{
+    INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]){};
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Calculate bounding box for current triangle
+/// @tparam CT: ConservativeRastFETraits type
+/// @param vX: fixed point X position for triangle verts
+/// @param vY: fixed point Y position for triangle verts
+/// @param bbox: fixed point bbox
+/// *Note*: expects vX, vY to be in the correct precision for the type 
+/// of rasterization. This avoids unnecessary FP->fixed conversions.
+template <typename CT>
+INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox){}
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief FEStandardRastT specialization of calcBoundingBoxIntVertical
+template <>
+INLINE void calcBoundingBoxIntVertical<FEStandardRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
+{
+    // FE conservative rast traits
+    typedef FEStandardRastT CT;
+
+    static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Standard rast BBox calculation needs to be in 16.8 precision");
+    // Update vXi, vYi fixed point precision for BBox calculation if necessary
+    FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);
+
+    simdscalari vMinX = vX[0];
+    vMinX = _simd_min_epi32(vMinX, vX[1]);
+    vMinX = _simd_min_epi32(vMinX, vX[2]);
+
+    simdscalari vMaxX = vX[0];
+    vMaxX = _simd_max_epi32(vMaxX, vX[1]);
+    vMaxX = _simd_max_epi32(vMaxX, vX[2]);
+
+    simdscalari vMinY = vY[0];
+    vMinY = _simd_min_epi32(vMinY, vY[1]);
+    vMinY = _simd_min_epi32(vMinY, vY[2]);
+
+    simdscalari vMaxY = vY[0];
+    vMaxY = _simd_max_epi32(vMaxY, vY[1]);
+    vMaxY = _simd_max_epi32(vMaxY, vY[2]);
+
+    bbox.left = vMinX;
+    bbox.right = vMaxX;
+    bbox.top = vMinY;
+    bbox.bottom = vMaxY;
+}
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief FEConservativeRastT specialization of calcBoundingBoxIntVertical
+/// Offsets BBox for conservative rast
+template <>
+INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
+{
+    // FE conservative rast traits
+    typedef FEConservativeRastT CT;
+
+    static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_9>>::value, "Conservative rast BBox calculation needs to be in 16.9 precision");
+    // Update vXi, vYi fixed point precision for BBox calculation if necessary
+    FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);
+
+    simdscalari vMinX = vX[0];
+    vMinX = _simd_min_epi32(vMinX, vX[1]);
+    vMinX = _simd_min_epi32(vMinX, vX[2]);
+
+    simdscalari vMaxX = vX[0];
+    vMaxX = _simd_max_epi32(vMaxX, vX[1]);
+    vMaxX = _simd_max_epi32(vMaxX, vX[2]);
+
+    simdscalari vMinY = vY[0];
+    vMinY = _simd_min_epi32(vMinY, vY[1]);
+    vMinY = _simd_min_epi32(vMinY, vY[2]);
+
+    simdscalari vMaxY = vY[0];
+    vMaxY = _simd_max_epi32(vMaxY, vY[1]);
+    vMaxY = _simd_max_epi32(vMaxY, vY[2]);
+    
+    /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
+    bbox.left = _simd_srli_epi32(_simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
+    bbox.right = _simd_srli_epi32(_simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
+    bbox.top = _simd_srli_epi32(_simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
+    bbox.bottom = _simd_srli_epi32(_simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
+}
+
+//////////////////////////////////////////////////////////////////////////
 /// @brief Bin triangle primitives to macro tiles. Performs setup, clipping
 ///        culling, viewport transform, etc.
 /// @param pDC - pointer to draw context.
@@ -1597,6 +1724,8 @@ void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask,
 /// @param workerId - thread's worker id. Even thread has a unique id.
 /// @param tri - Contains triangle position data for SIMDs worth of triangles.
 /// @param primID - Primitive ID for each triangle.
+/// @tparam CT - ConservativeRastFETraits
+template <typename CT>
 void BinTriangles(
     DRAW_CONTEXT *pDC,
     PA_STATE& pa,
@@ -1652,14 +1781,9 @@ void BinTriangles(
     tri[2].x = _simd_add_ps(tri[2].x, offset);
     tri[2].y = _simd_add_ps(tri[2].y, offset);
 
-    // convert to fixed point
     simdscalari vXi[3], vYi[3];
-    vXi[0] = fpToFixedPointVertical(tri[0].x);
-    vYi[0] = fpToFixedPointVertical(tri[0].y);
-    vXi[1] = fpToFixedPointVertical(tri[1].x);
-    vYi[1] = fpToFixedPointVertical(tri[1].y);
-    vXi[2] = fpToFixedPointVertical(tri[2].x);
-    vYi[2] = fpToFixedPointVertical(tri[2].y);
+    // Set vXi, vYi to fixed point precision required for degenerate triangle check
+    FPToFixedPoint<typename CT::ZeroAreaPrecisionT>::Set(tri, vXi, vYi);
 
     // triangle setup
     simdscalari vAi[3], vBi[3];
@@ -1669,6 +1793,8 @@ void BinTriangles(
     simdscalari vDet[2];
     calcDeterminantIntVertical(vAi, vBi, vDet);
 
+    /// todo: handle degen tri's for Conservative Rast.  
+
     // cull zero area
     int maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[0], _simd_setzero_si())));
     int maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[1], _simd_setzero_si())));
@@ -1713,6 +1839,7 @@ void BinTriangles(
         RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0);
     }
 
+    /// Note: these variable initializations must stay above any 'goto endBenTriangles'
     // compute per tri backface
     uint32_t frontFaceMask = frontWindingTris;
 
@@ -1726,14 +1853,13 @@ void BinTriangles(
 
     // Calc bounding box of triangles
     simdBBox bbox;
-    calcBoundingBoxIntVertical(vXi, vYi, bbox);
+    calcBoundingBoxIntVertical<CT>(tri, vXi, vYi, bbox);
 
     // determine if triangle falls between pixel centers and discard
-    // only discard for non-MSAA case
+    // only discard for non-MSAA case and when conservative rast is disabled
     // (left + 127) & ~255
     // (right + 128) & ~255
-
-    if(rastState.sampleCount == SWR_MULTISAMPLE_1X)
+    if(rastState.sampleCount == SWR_MULTISAMPLE_1X && (!CT::IsConservativeT::value))
     {
         origTriMask = triMask;
 
@@ -1891,7 +2017,22 @@ endBinTriangles:
     RDTSC_STOP(FEBinTriangles, 1, 0);
 }
 
+struct FEBinTrianglesChooser
+{
+    typedef PFN_PROCESS_PRIMS FuncType;
+
+    template <typename... ArgsB>
+    static FuncType GetFunc()
+    {
+        return BinTriangles<ConservativeRastFETraits<ArgsB...>>;
+    }
+};
 
+// Selector for correct templated Draw front-end function
+PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative)
+{
+    return TemplateArgUnroller<FEBinTrianglesChooser>::GetFunc(IsConservative);
+}
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief Bin SIMD points to the backend.  Only supports point size of 1
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index dfd3987..2de5d26 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -30,21 +30,6 @@
 #include "context.h"
 #include <type_traits>
 
-INLINE
-__m128i fpToFixedPoint(const __m128 vIn)
-{
-    __m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE));
-    return _mm_cvtps_epi32(vFixed);
-}
-
-INLINE
-simdscalari fpToFixedPointVertical(const simdscalar vIn)
-{
-    simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(FIXED_POINT_SCALE));
-    return _simd_cvtps_epi32(vFixed);
-}
-
-
 // Calculates the A and B coefficients for the 3 edges of the triangle
 // 
 // maths for edge equations:
@@ -272,31 +257,6 @@ A = _mm_shuffle_ps(A, B, 1 0 1 0)
 }
 
 INLINE
-void calcBoundingBoxIntVertical(const simdscalari (&vX)[3], const simdscalari (&vY)[3], simdBBox &bbox)
-{
-    simdscalari vMinX = vX[0];
-    vMinX = _simd_min_epi32(vMinX, vX[1]);
-    vMinX = _simd_min_epi32(vMinX, vX[2]);
-
-    simdscalari vMaxX = vX[0];
-    vMaxX = _simd_max_epi32(vMaxX, vX[1]);
-    vMaxX = _simd_max_epi32(vMaxX, vX[2]);
-
-    simdscalari vMinY = vY[0];
-    vMinY = _simd_min_epi32(vMinY, vY[1]);
-    vMinY = _simd_min_epi32(vMinY, vY[2]);
-
-    simdscalari vMaxY = vY[0];
-    vMaxY = _simd_max_epi32(vMaxY, vY[1]);
-    vMaxY = _simd_max_epi32(vMaxY, vY[2]);
-
-    bbox.left = vMinX;
-    bbox.right = vMaxX;
-    bbox.top = vMinY;
-    bbox.bottom = vMaxY;
-}
-
-INLINE
 bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
 {
     const API_STATE& state = GetApiState(pDC);
@@ -334,8 +294,9 @@ void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uin
 void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
 void ProcessQueryStats(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
 
+PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
+
 struct PA_STATE_BASE;  // forward decl
-void BinTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector tri[3], uint32_t primMask, simdscalari primID);
 void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
 void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
index bcfeef4..d3faf2a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
@@ -28,8 +28,16 @@
 #pragma once
 
 #include "context.h"
+#include <type_traits>
 
 extern PFN_WORK_FUNC gRasterizerTable[2][SWR_MULTISAMPLE_TYPE_MAX];
 void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
 void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
 void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
+
+INLINE
+__m128i fpToFixedPoint(const __m128 vIn)
+{
+    __m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE));
+    return _mm_cvtps_epi32(vFixed);
+}
\ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 5156c6b..05735b3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -909,6 +909,7 @@ struct SWR_RASTSTATE
     uint32_t forcedSampleCount      : 1;
     uint32_t pixelOffset            : 1;
     uint32_t depthBiasPreAdjusted   : 1;    ///< depth bias constant is in float units, not per-format Z units
+    uint32_t conservativeRast       : 1;
 
     float pointSize;
     float lineWidth;
@@ -989,6 +990,7 @@ enum SWR_INPUT_COVERAGE
 {
     SWR_INPUT_COVERAGE_NONE,
     SWR_INPUT_COVERAGE_NORMAL,
+    SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
     SWR_INPUT_COVERAGE_MAX,
 };
 
@@ -1016,7 +1018,7 @@ struct SWR_PS_STATE
 
     // dword 2
     uint32_t killsPixel         : 1;    // pixel shader can kill pixels
-    uint32_t inputCoverage      : 1;    // type of input coverage PS uses
+    uint32_t inputCoverage      : 1;    // ps uses input coverage
     uint32_t writesODepth       : 1;    // pixel shader writes to depth
     uint32_t usesSourceDepth    : 1;    // pixel shader reads depth
     uint32_t shadingRate        : 2;    // shading per pixel / sample / coarse pixel
diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h
index 2853f98..f1b028e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/utils.h
+++ b/src/gallium/drivers/swr/rasterizer/core/utils.h
@@ -849,6 +849,36 @@ struct TemplateArgUnroller
 
         return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...);
     }
+
+    // Last Arg Terminator
+    template <typename... TArgsT>
+    static typename TermT::FuncType GetFunc(uint32_t iArg)
+    {
+        switch(iArg)
+        {
+        case 0: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 0>>();
+        case 1: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 1>>();
+        case 2: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 2>>();
+        case 3: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 3>>();
+        case 4: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 4>>();
+        default: SWR_ASSUME(false); return nullptr;
+        }
+    }
+
+    // Recursively parse args
+    template <typename... TArgsT>
+    static typename TermT::FuncType GetFunc(uint32_t iArg, TArgsT... remainingArgs)
+    {
+        switch(iArg)
+        {
+        case 0: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 0>>::GetFunc(remainingArgs...);
+        case 1: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 1>>::GetFunc(remainingArgs...);
+        case 2: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 2>>::GetFunc(remainingArgs...);
+        case 3: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 3>>::GetFunc(remainingArgs...);
+        case 4: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 4>>::GetFunc(remainingArgs...);
+        default: SWR_ASSUME(false); return nullptr;
+        }
+    }
 };
 
 
-- 
1.9.1



More information about the mesa-dev mailing list