[Mesa-dev] [PATCH v2 07/11] swr: [rasterizer] Interpolation utility functions

Tim Rowley timothy.o.rowley at intel.com
Fri Apr 22 00:37:45 UTC 2016


v2: use _mm_cmpunord_ps for vIsNaN
---
 .../drivers/swr/rasterizer/common/simdintrin.h     | 51 ++++++++++++++++++++--
 src/gallium/drivers/swr/rasterizer/core/frontend.h |  8 ++++
 src/gallium/drivers/swr/rasterizer/core/state.h    |  2 -
 3 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
index fa792b4..72fe15a 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
@@ -915,16 +915,25 @@ INLINE simdscalar vplaneps(simdscalar vA, simdscalar vB, simdscalar vC, simdscal
 }
 
 //////////////////////////////////////////////////////////////////////////
+/// @brief Compute plane equation vA * vX + vB * vY + vC
+INLINE __m128 vplaneps128(__m128 vA, __m128 vB, __m128 vC, __m128 &vX, __m128 &vY)
+{
+    __m128 vOut = _simd128_fmadd_ps(vA, vX, vC);
+    vOut = _simd128_fmadd_ps(vB, vY, vOut);
+    return vOut;
+}
+
+//////////////////////////////////////////////////////////////////////////
 /// @brief Interpolates a single component.
 /// @param vI - barycentric I
 /// @param vJ - barycentric J
 /// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template<UINT Attrib, UINT Comp>
+template<UINT Attrib, UINT Comp, UINT numComponents = 4>
 static INLINE simdscalar InterpolateComponent(simdscalar vI, simdscalar vJ, const float *pInterpBuffer)
 {
-    const float *pInterpA = &pInterpBuffer[Attrib * 12 + 0 + Comp];
-    const float *pInterpB = &pInterpBuffer[Attrib * 12 + 4 + Comp];
-    const float *pInterpC = &pInterpBuffer[Attrib * 12 + 8 + Comp];
+    const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
+    const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
+    const float *pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
 
     simdscalar vA = _simd_broadcast_ss(pInterpA);
     simdscalar vB = _simd_broadcast_ss(pInterpB);
@@ -936,6 +945,40 @@ static INLINE simdscalar InterpolateComponent(simdscalar vI, simdscalar vJ, cons
     return vplaneps(vA, vB, vC, vI, vJ);
 }
 
+//////////////////////////////////////////////////////////////////////////
+/// @brief Interpolates a single component.
+/// @param vI - barycentric I
+/// @param vJ - barycentric J
+/// @param pInterpBuffer - pointer to attribute barycentric coeffs
+template<UINT Attrib, UINT Comp, UINT numComponents = 4>
+static INLINE __m128 InterpolateComponent(__m128 vI, __m128 vJ, const float *pInterpBuffer)
+{
+    const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
+    const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
+    const float *pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
+
+    __m128 vA = _mm_broadcast_ss(pInterpA);
+    __m128 vB = _mm_broadcast_ss(pInterpB);
+    __m128 vC = _mm_broadcast_ss(pInterpC);
+
+    __m128 vk = _mm_sub_ps(_mm_sub_ps(_mm_set1_ps(1.0f), vI), vJ);
+    vC = _mm_mul_ps(vk, vC);
+
+    return vplaneps128(vA, vB, vC, vI, vJ);
+}
+
+static INLINE __m128 _simd128_abs_ps(__m128 a)
+{
+    __m128i ai = _mm_castps_si128(a);
+    return _mm_castsi128_ps(_mm_and_si128(ai, _mm_set1_epi32(0x7fffffff)));
+}
+
+static INLINE simdscalar _simd_abs_ps(simdscalar a)
+{
+    simdscalari ai = _simd_castps_si(a);
+    return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff)));
+}
+
 INLINE
 UINT pdep_u32(UINT a, UINT mask)
 {
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index 8307c0b..b637785 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -307,6 +307,14 @@ bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
             !state.rastState.pointSpriteEnable);
 }
 
+INLINE
+bool vIsNaN(const __m128& vec)
+{
+    const __m128 result = _mm_cmpunord_ps(vec, vec);
+    const int32_t mask = _mm_movemask_ps(result);
+    return (mask != 0);
+}
+
 uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
 uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 5036106..88ec4b0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -197,8 +197,6 @@ enum SWR_OUTER_TESSFACTOR_ID
 #define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist
 #define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist
 #define VERTEX_POINT_SIZE_SLOT 37       // VS writes point size here
-static_assert(VERTEX_POINT_SIZE_SLOT < KNOB_NUM_ATTRIBUTES, "Mismatched attribute slot size");
-
 // SoAoSoA
 struct simdvertex
 {
-- 
1.9.1



More information about the mesa-dev mailing list