Mesa (master): llvmpipe: add rast_tri_4_16 for small lines and points

Keith Whitwell keithw at kemper.freedesktop.org
Fri Oct 8 16:32:05 UTC 2010


Module: Mesa
Branch: master
Commit: 0ff132e5a633170afaed0aea54d01438c895b8ab
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ff132e5a633170afaed0aea54d01438c895b8ab

Author: Keith Whitwell <keithw at vmware.com>
Date:   Fri Oct  8 17:21:03 2010 +0100

llvmpipe: add rast_tri_4_16 for small lines and points

---

 src/gallium/drivers/llvmpipe/lp_rast.c         |    1 +
 src/gallium/drivers/llvmpipe/lp_rast.h         |   11 +-
 src/gallium/drivers/llvmpipe/lp_rast_debug.c   |    1 +
 src/gallium/drivers/llvmpipe/lp_rast_priv.h    |    4 +
 src/gallium/drivers/llvmpipe/lp_rast_tri.c     |  152 ++----------------------
 src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h |  127 ++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_setup_tri.c    |   13 ++-
 7 files changed, 161 insertions(+), 148 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 790d88a..db9b2f9 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -597,6 +597,7 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
    lp_rast_triangle_8,
    lp_rast_triangle_3_4,
    lp_rast_triangle_3_16,
+   lp_rast_triangle_4_16,
    lp_rast_shade_tile,
    lp_rast_shade_tile_opaque,
    lp_rast_begin_query,
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 0f62377..df0bea0 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -238,12 +238,13 @@ lp_rast_arg_null( void )
 #define LP_RAST_OP_TRIANGLE_8        0x9
 #define LP_RAST_OP_TRIANGLE_3_4      0xa
 #define LP_RAST_OP_TRIANGLE_3_16     0xb
-#define LP_RAST_OP_SHADE_TILE        0xc
-#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd
-#define LP_RAST_OP_BEGIN_QUERY       0xe
-#define LP_RAST_OP_END_QUERY         0xf
+#define LP_RAST_OP_TRIANGLE_4_16     0xc
+#define LP_RAST_OP_SHADE_TILE        0xd
+#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe
+#define LP_RAST_OP_BEGIN_QUERY       0xf
+#define LP_RAST_OP_END_QUERY         0x10
 
-#define LP_RAST_OP_MAX               0x10
+#define LP_RAST_OP_MAX               0x11
 #define LP_RAST_OP_MASK              0xff
 
 void
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
index 9fc7864..6f4ba1c 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c
@@ -42,6 +42,7 @@ static const char *cmd_names[LP_RAST_OP_MAX] =
    "triangle_8",
    "triangle_3_4",
    "triangle_3_16",
+   "triangle_4_16",
    "shade_tile",
    "shade_tile_opaque",
    "begin_query",
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 7370119..104000a 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -293,6 +293,10 @@ void lp_rast_triangle_3_4(struct lp_rasterizer_task *,
 
 void lp_rast_triangle_3_16( struct lp_rasterizer_task *, 
                             const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_4_16( struct lp_rasterizer_task *, 
+                            const union lp_rast_cmd_arg );
+
 void
 lp_debug_bin( const struct cmd_bin *bin );
 
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index a1f309d..f870a18 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -123,6 +123,16 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
 }
 
 void
+lp_rast_triangle_4_16(struct lp_rasterizer_task *task,
+                      const union lp_rast_cmd_arg arg)
+{
+   union lp_rast_cmd_arg arg2;
+   arg2.triangle.tri = arg.triangle.tri;
+   arg2.triangle.plane_mask = (1<<4)-1;
+   lp_rast_triangle_3(task, arg2);
+}
+
+void
 lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
                       const union lp_rast_cmd_arg arg)
 {
@@ -229,145 +239,6 @@ sign_bits4(const __m128i *cstep, int cdiff)
    return _mm_movemask_epi8(result);
 }
 
-
-/* Special case for 3 plane triangle which is contained entirely
- * within a 16x16 block.
- */
-void
-lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
-                      const union lp_rast_cmd_arg arg)
-{
-   const struct lp_rast_triangle *tri = arg.triangle.tri;
-   const struct lp_rast_plane *plane = tri->plane;
-   unsigned mask = arg.triangle.plane_mask;
-   const int x = task->x + (mask & 0xff);
-   const int y = task->y + (mask >> 8);
-   unsigned outmask, inmask, partmask, partial_mask;
-   unsigned j;
-   __m128i cstep4[3][4];
-
-   outmask = 0;                 /* outside one or more trivial reject planes */
-   partmask = 0;                /* outside one or more trivial accept planes */
-
-   for (j = 0; j < 3; j++) {
-      const int dcdx = -plane[j].dcdx * 4;
-      const int dcdy = plane[j].dcdy * 4;
-      __m128i xdcdy = _mm_set1_epi32(dcdy);
-
-      cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
-      cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
-      cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
-      cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
-
-      {
-	 const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
-	 const int cox = plane[j].eo * 4;
-	 const int cio = plane[j].ei * 4 - 1;
-
-	 outmask |= sign_bits4(cstep4[j], c + cox);
-	 partmask |= sign_bits4(cstep4[j], c + cio);
-      }
-   }
-
-   if (outmask == 0xffff)
-      return;
-
-   /* Mask of sub-blocks which are inside all trivial accept planes:
-    */
-   inmask = ~partmask & 0xffff;
-
-   /* Mask of sub-blocks which are inside all trivial reject planes,
-    * but outside at least one trivial accept plane:
-    */
-   partial_mask = partmask & ~outmask;
-
-   assert((partial_mask & inmask) == 0);
-
-   /* Iterate over partials:
-    */
-   while (partial_mask) {
-      int i = ffs(partial_mask) - 1;
-      int ix = (i & 3) * 4;
-      int iy = (i >> 2) * 4;
-      int px = x + ix;
-      int py = y + iy; 
-      unsigned mask = 0xffff;
-
-      partial_mask &= ~(1 << i);
-
-      for (j = 0; j < 3; j++) {
-         const int cx = (plane[j].c 
-			 - plane[j].dcdx * px
-			 + plane[j].dcdy * py) * 4;
-
-	 mask &= ~sign_bits4(cstep4[j], cx);
-      }
-
-      if (mask)
-	 lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
-   }
-
-   /* Iterate over fulls: 
-    */
-   while (inmask) {
-      int i = ffs(inmask) - 1;
-      int ix = (i & 3) * 4;
-      int iy = (i >> 2) * 4;
-      int px = x + ix;
-      int py = y + iy; 
-
-      inmask &= ~(1 << i);
-
-      block_full_4(task, tri, px, py);
-   }
-}
-
-
-void
-lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
-		     const union lp_rast_cmd_arg arg)
-{
-   const struct lp_rast_triangle *tri = arg.triangle.tri;
-   const struct lp_rast_plane *plane = tri->plane;
-   unsigned mask = arg.triangle.plane_mask;
-   const int x = task->x + (mask & 0xff);
-   const int y = task->y + (mask >> 8);
-   unsigned j;
-
-   /* Iterate over partials:
-    */
-   {
-      unsigned mask = 0xffff;
-
-      for (j = 0; j < 3; j++) {
-	 const int cx = (plane[j].c 
-			 - plane[j].dcdx * x
-			 + plane[j].dcdy * y);
-
-	 const int dcdx = -plane[j].dcdx;
-	 const int dcdy = plane[j].dcdy;
-	 __m128i xdcdy = _mm_set1_epi32(dcdy);
-
-	 __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
-	 __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
-	 __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
-	 __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
-
-	 __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
-	 __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
-	 __m128i result = _mm_packs_epi16(cstep01, cstep23);
-
-	 /* Extract the sign bits
-	  */
-	 mask &= ~_mm_movemask_epi8(result);
-      }
-
-      if (mask)
-	 lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
-   }
-}
-
-
 #endif
 
 
@@ -383,10 +254,13 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
 
 #define TAG(x) x##_3
 #define NR_PLANES 3
+#define TRI_4 lp_rast_triangle_3_4
+#define TRI_16 lp_rast_triangle_3_16
 #include "lp_rast_tri_tmp.h"
 
 #define TAG(x) x##_4
 #define NR_PLANES 4
+#define TRI_16 lp_rast_triangle_4_16
 #include "lp_rast_tri_tmp.h"
 
 #define TAG(x) x##_5
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index 9830a43..c8f9956 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -245,6 +245,133 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
    }
 }
 
+#if defined(PIPE_ARCH_SSE) && defined(TRI_16)
+/* XXX: special case this when intersection is not required.
+ *      - tile completely within bbox,
+ *      - bbox completely within tile.
+ */
+void
+TRI_16(struct lp_rasterizer_task *task,
+       const union lp_rast_cmd_arg arg)
+{
+   const struct lp_rast_triangle *tri = arg.triangle.tri;
+   const struct lp_rast_plane *plane = tri->plane;
+   unsigned mask = arg.triangle.plane_mask;
+   unsigned outmask, partial_mask;
+   unsigned j;
+   __m128i cstep4[NR_PLANES][4];
+
+   int x = (mask & 0xff);
+   int y = (mask >> 8);
+
+   outmask = 0;                 /* outside one or more trivial reject planes */
+   
+   x += task->x;
+   y += task->y;
+
+   for (j = 0; j < NR_PLANES; j++) {
+      const int dcdx = -plane[j].dcdx * 4;
+      const int dcdy = plane[j].dcdy * 4;
+      __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+      cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
+      cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
+      cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
+      cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
+
+      {
+	 const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+	 const int cox = plane[j].eo * 4;
+
+	 outmask |= sign_bits4(cstep4[j], c + cox);
+      }
+   }
+
+   if (outmask == 0xffff)
+      return;
+
+
+   /* Mask of sub-blocks which are inside all trivial reject planes,
+    * but outside at least one trivial accept plane:
+    */
+   partial_mask = 0xffff & ~outmask;
+
+   /* Iterate over partials:
+    */
+   while (partial_mask) {
+      int i = ffs(partial_mask) - 1;
+      int ix = (i & 3) * 4;
+      int iy = (i >> 2) * 4;
+      int px = x + ix;
+      int py = y + iy; 
+      unsigned mask = 0xffff;
+
+      partial_mask &= ~(1 << i);
+
+      for (j = 0; j < NR_PLANES; j++) {
+         const int cx = (plane[j].c 
+			 - plane[j].dcdx * px
+			 + plane[j].dcdy * py) * 4;
+
+	 mask &= ~sign_bits4(cstep4[j], cx);
+      }
+
+      if (mask)
+	 lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
+   }
+}
+#endif
+
+#if defined(PIPE_ARCH_SSE) && defined(TRI_4)
+void
+TRI_4(struct lp_rasterizer_task *task,
+      const union lp_rast_cmd_arg arg)
+{
+   const struct lp_rast_triangle *tri = arg.triangle.tri;
+   const struct lp_rast_plane *plane = tri->plane;
+   unsigned mask = arg.triangle.plane_mask;
+   const int x = task->x + (mask & 0xff);
+   const int y = task->y + (mask >> 8);
+   unsigned j;
+
+   /* Iterate over partials:
+    */
+   {
+      unsigned mask = 0xffff;
+
+      for (j = 0; j < NR_PLANES; j++) {
+	 const int cx = (plane[j].c 
+			 - plane[j].dcdx * x
+			 + plane[j].dcdy * y);
+
+	 const int dcdx = -plane[j].dcdx;
+	 const int dcdy = plane[j].dcdy;
+	 __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+	 __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
+	 __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+	 __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+	 __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+	 __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+	 __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+	 __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+	 /* Extract the sign bits
+	  */
+	 mask &= ~_mm_movemask_epi8(result);
+      }
+
+      if (mask)
+	 lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+   }
+}
+#endif
+
+
+
 #undef TAG
+#undef TRI_4
+#undef TRI_16
 #undef NR_PLANES
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 9f87101..8fd0346 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -479,15 +479,14 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
    {
       int ix0 = bbox->x0 / TILE_SIZE;
       int iy0 = bbox->y0 / TILE_SIZE;
+      int px = bbox->x0 & 63 & ~3;
+      int py = bbox->y0 & 63 & ~3;
+      int mask = px | (py << 8);
 
       assert(iy0 == bbox->y1 / TILE_SIZE &&
 	     ix0 == bbox->x1 / TILE_SIZE);
 
       if (nr_planes == 3) {
-         int px = bbox->x0 & 63 & ~3;
-         int py = bbox->y0 & 63 & ~3;
-	 int mask = px | (py << 8);
-
          if (sz < 4)
          {
             /* Triangle is contained in a single 4x4 stamp:
@@ -507,6 +506,12 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
                                          lp_rast_arg_triangle(tri, mask) );
          }
       }
+      else if (nr_planes == 4 && sz < 16) 
+      {
+         return lp_scene_bin_command( scene, ix0, iy0,
+                                      LP_RAST_OP_TRIANGLE_4_16,
+                                      lp_rast_arg_triangle(tri, mask) );
+      }
 
 
       /* Triangle is contained in a single tile:




More information about the mesa-commit mailing list