[Mesa-dev] [PATCH][RFC] mesa/main: Clamp rgba with streamed sse

Juha-Pekka Heikkila juhapekka.heikkila at gmail.com
Fri Oct 31 03:13:10 PDT 2014


Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
---
 src/mesa/main/colormac.h      | 20 +++++++++++++++
 src/mesa/main/pixeltransfer.c | 59 ++++++++++++++++++++++++++++++++-----------
 2 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/src/mesa/main/colormac.h b/src/mesa/main/colormac.h
index c8adca6..da5e094 100644
--- a/src/mesa/main/colormac.h
+++ b/src/mesa/main/colormac.h
@@ -51,6 +51,26 @@ _mesa_unclamped_float_rgba_to_ubyte(GLubyte dst[4], const GLfloat src[4])
 
 
 /**
+ * Clamp four float values to [min,max]
+ */
+#if defined(__SSE2__) && defined(__GNUC__)
+static inline void
+_mesa_clamp_float_rgba(GLfloat src[4], GLfloat result[4], const float min,
+                       const float max)
+{
+    __m128  operand, minval, maxval;
+
+    operand = _mm_loadu_ps(src);
+    minval = _mm_set1_ps(min);
+    maxval = _mm_set1_ps(max);
+    operand = _mm_max_ps(operand, minval);
+    operand = _mm_min_ps(operand, maxval);
+    _mm_storeu_ps(result, operand);
+}
+#endif
+
+
+/**
  * \name Generic color packing macros.  All inputs should be GLubytes.
  *
  * \todo We may move these into texstore.h at some point.
diff --git a/src/mesa/main/pixeltransfer.c b/src/mesa/main/pixeltransfer.c
index 8bbeeb8..e16eb59 100644
--- a/src/mesa/main/pixeltransfer.c
+++ b/src/mesa/main/pixeltransfer.c
@@ -35,7 +35,7 @@
 #include "pixeltransfer.h"
 #include "imports.h"
 #include "mtypes.h"
-
+#include "x86/common_x86_asm.h"
 
 /*
  * Apply scale and bias factors to an array of RGBA pixels.
@@ -89,16 +89,34 @@ _mesa_map_rgba( const struct gl_context *ctx, GLuint n, GLfloat rgba[][4] )
    const GLfloat *bMap = ctx->PixelMaps.BtoB.Map;
    const GLfloat *aMap = ctx->PixelMaps.AtoA.Map;
    GLuint i;
-   for (i=0;i<n;i++) {
-      GLfloat r = CLAMP(rgba[i][RCOMP], 0.0F, 1.0F);
-      GLfloat g = CLAMP(rgba[i][GCOMP], 0.0F, 1.0F);
-      GLfloat b = CLAMP(rgba[i][BCOMP], 0.0F, 1.0F);
-      GLfloat a = CLAMP(rgba[i][ACOMP], 0.0F, 1.0F);
-      rgba[i][RCOMP] = rMap[F_TO_I(r * rscale)];
-      rgba[i][GCOMP] = gMap[F_TO_I(g * gscale)];
-      rgba[i][BCOMP] = bMap[F_TO_I(b * bscale)];
-      rgba[i][ACOMP] = aMap[F_TO_I(a * ascale)];
+
+#if defined(__SSE2__) && defined(__GNUC__)
+   if (cpu_has_xmm2) {
+      for (i=0;i<n;i++) {
+         GLfloat rgba_temp[4];
+         _mesa_clamp_float_rgba(rgba[i], rgba_temp, 0.0F, 1.0F);
+         rgba[i][RCOMP] = rMap[F_TO_I(rgba_temp[RCOMP] * rscale)];
+         rgba[i][GCOMP] = gMap[F_TO_I(rgba_temp[GCOMP] * gscale)];
+         rgba[i][BCOMP] = bMap[F_TO_I(rgba_temp[BCOMP] * bscale)];
+         rgba[i][ACOMP] = aMap[F_TO_I(rgba_temp[ACOMP] * ascale)];
+      }
+   }
+   else {
+#endif
+      for (i=0;i<n;i++) {
+         GLfloat rgba_temp[4];
+         rgba_temp[RCOMP] = CLAMP(rgba[i][RCOMP], 0.0F, 1.0F);
+         rgba_temp[GCOMP] = CLAMP(rgba[i][GCOMP], 0.0F, 1.0F);
+         rgba_temp[BCOMP] = CLAMP(rgba[i][BCOMP], 0.0F, 1.0F);
+         rgba_temp[ACOMP] = CLAMP(rgba[i][ACOMP], 0.0F, 1.0F);
+         rgba[i][RCOMP] = rMap[F_TO_I(rgba_temp[RCOMP] * rscale)];
+         rgba[i][GCOMP] = gMap[F_TO_I(rgba_temp[GCOMP] * gscale)];
+         rgba[i][BCOMP] = bMap[F_TO_I(rgba_temp[BCOMP] * bscale)];
+         rgba[i][ACOMP] = aMap[F_TO_I(rgba_temp[ACOMP] * ascale)];
+      }
+#if defined(__SSE2__) && defined(__GNUC__)
    }
+#endif
 }
 
 /*
@@ -179,12 +197,23 @@ _mesa_apply_rgba_transfer_ops(struct gl_context *ctx, GLbitfield transferOps,
    /* clamping to [0,1] */
    if (transferOps & IMAGE_CLAMP_BIT) {
       GLuint i;
-      for (i = 0; i < n; i++) {
-         rgba[i][RCOMP] = CLAMP(rgba[i][RCOMP], 0.0F, 1.0F);
-         rgba[i][GCOMP] = CLAMP(rgba[i][GCOMP], 0.0F, 1.0F);
-         rgba[i][BCOMP] = CLAMP(rgba[i][BCOMP], 0.0F, 1.0F);
-         rgba[i][ACOMP] = CLAMP(rgba[i][ACOMP], 0.0F, 1.0F);
+#if defined(__SSE2__) && defined(__GNUC__)
+      if (cpu_has_xmm2) {
+         for (i = 0; i < n; i++) {
+             _mesa_clamp_float_rgba(rgba[i], rgba[i], 0.0F, 1.0F);
+         }
+      }
+      else {
+#endif
+         for (i = 0; i < n; i++) {
+            rgba[i][RCOMP] = CLAMP(rgba[i][RCOMP], 0.0F, 1.0F);
+            rgba[i][GCOMP] = CLAMP(rgba[i][GCOMP], 0.0F, 1.0F);
+            rgba[i][BCOMP] = CLAMP(rgba[i][BCOMP], 0.0F, 1.0F);
+            rgba[i][ACOMP] = CLAMP(rgba[i][ACOMP], 0.0F, 1.0F);
+         }
+#if defined(__SSE2__) && defined(__GNUC__)
       }
+#endif
    }
 }
 
-- 
1.8.5.1



More information about the mesa-dev mailing list