Mesa (master): llvmpipe: combine linear mask calculation

Keith Whitwell keithw at kemper.freedesktop.org
Tue Aug 31 22:33:03 UTC 2010


Module: Mesa
Branch: master
Commit: 0aa3a09ced07e150901cd0f7a7917556a018c252
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=0aa3a09ced07e150901cd0f7a7917556a018c252

Author: Keith Whitwell <keithw at vmware.com>
Date:   Sun Aug 22 22:56:54 2010 +0100

llvmpipe: combine linear mask calculation

---

 src/gallium/drivers/llvmpipe/lp_rast_tri.c     |   73 ++++++++++++++++++++++--
 src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h |   26 +++++---
 2 files changed, 84 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 5b3ad6e..bdb8d13 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -128,11 +128,71 @@ build_mask_linear(int c, int dcdx, int dcdy)
   
    return mask;
 }
+
+
+static INLINE void
+build_masks(int c, 
+	    int cdiff,
+	    int dcdx,
+	    int dcdy,
+	    unsigned *outmask,
+	    unsigned *partmask)
+{
+   *outmask |= build_mask_linear(c, dcdx, dcdy);
+   *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy);
+}
+
 #else
 #include <emmintrin.h>
 #include "util/u_sse.h"
 
 
+static INLINE void
+build_masks(int c, 
+	    int cdiff,
+	    int dcdx,
+	    int dcdy,
+	    unsigned *outmask,
+	    unsigned *partmask)
+{
+   __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
+   __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+   /* Get values across the quad
+    */
+   __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+   __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+   __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+   {
+      __m128i cstep01, cstep23, result;
+
+      cstep01 = _mm_packs_epi32(cstep0, cstep1);
+      cstep23 = _mm_packs_epi32(cstep2, cstep3);
+      result = _mm_packs_epi16(cstep01, cstep23);
+
+      *outmask |= _mm_movemask_epi8(result);
+   }
+
+
+   {
+      __m128i cio4 = _mm_set1_epi32(cdiff);
+      __m128i cstep01, cstep23, result;
+
+      cstep0 = _mm_add_epi32(cstep0, cio4);
+      cstep1 = _mm_add_epi32(cstep1, cio4);
+      cstep2 = _mm_add_epi32(cstep2, cio4);
+      cstep3 = _mm_add_epi32(cstep3, cio4);
+
+      cstep01 = _mm_packs_epi32(cstep0, cstep1);
+      cstep23 = _mm_packs_epi32(cstep2, cstep3);
+      result = _mm_packs_epi16(cstep01, cstep23);
+
+      *partmask |= _mm_movemask_epi8(result);
+   }
+}
+
+
 static INLINE unsigned
 build_mask_linear(int c, int dcdx, int dcdy)
 {
@@ -263,11 +323,14 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
       {
 	 const int dcdx = -plane[j].dcdx * 4;
 	 const int dcdy = plane[j].dcdy * 4;
-	 const int cox = c[j] + plane[j].eo * 4;
-	 const int cio = c[j] + plane[j].ei * 4 - 1;
-
-	 outmask |= build_mask_linear(cox, dcdx, dcdy);
-	 partmask |= build_mask_linear(cio, dcdx, dcdy);
+	 const int cox = plane[j].eo * 4;
+	 const int cio = plane[j].ei * 4 - 1;
+
+	 build_masks(c[j] + cox,
+		     cio - cox,
+		     dcdx, dcdy, 
+		     &outmask,   /* sign bits from c[i][0..15] + cox */
+		     &partmask); /* sign bits from c[i][0..15] + cio */
       }
    }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index 0def5f7..99a0bae 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -81,11 +81,14 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
    for (j = 0; j < NR_PLANES; j++) {
       const int dcdx = -plane[j].dcdx * 4;
       const int dcdy = plane[j].dcdy * 4;
-      const int cox = c[j] + plane[j].eo * 4;
-      const int cio = c[j] + plane[j].ei * 4 - 1;
-
-      outmask |= build_mask_linear(cox, dcdx, dcdy);
-      partmask |= build_mask_linear(cio, dcdx, dcdy);
+      const int cox = plane[j].eo * 4;
+      const int cio = plane[j].ei * 4 - 1;
+
+      build_masks(c[j] + cox,
+		  cio - cox,
+		  dcdx, dcdy, 
+		  &outmask,   /* sign bits from c[i][0..15] + cox */
+		  &partmask); /* sign bits from c[i][0..15] + cio */
    }
 
    if (outmask == 0xffff)
@@ -171,11 +174,14 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
       {
 	 const int dcdx = -plane[j].dcdx * 16;
 	 const int dcdy = plane[j].dcdy * 16;
-	 const int cox = c[j] + plane[j].eo * 16;
-	 const int cio = c[j] + plane[j].ei * 16 - 1;
-
-	 outmask |= build_mask_linear(cox, dcdx, dcdy);
-	 partmask |= build_mask_linear(cio, dcdx, dcdy);
+	 const int cox = plane[j].eo * 16;
+	 const int cio = plane[j].ei * 16 - 1;
+
+	 build_masks(c[j] + cox,
+		     cio - cox,
+		     dcdx, dcdy, 
+		     &outmask,   /* sign bits from c[i][0..15] + cox */
+		     &partmask); /* sign bits from c[i][0..15] + cio */
       }
 
       j++;




More information about the mesa-commit mailing list