mesa: Branch 'index-swtnl-0.1'

Fri Feb 16 19:04:46 UTC 2007

src/mesa/drivers/dri/i915tex/intel_batchbuffer.h |    5 
 src/mesa/drivers/dri/i915tex/intel_idx_render.c  |   78 +++++--
 src/mesa/tnl/t_context.h                         |    6 
 src/mesa/tnl/t_vb_index.c                        |  228 +++++++++++++++--------
 4 files changed, 220 insertions(+), 97 deletions(-)

New commits:
diff-tree bd56abc3441dfb192b7825a8407411f89827abd1 (from 2ca4ca0068203699f73546de0db544a2a687eae7)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date:   Fri Feb 16 18:35:12 2007 +0000

    Dynamically grow index storage.  Fix several bugs.

diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
index 59261f7..916bcae 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
@@ -6,7 +6,10 @@
 
 struct intel_context;
 
-#define BATCH_SZ 16384
+/* Must be able to hold at minimum VB->Size * 3 * 2 bytes for
+ * intel_idx_render.c indices, which is currently about 20k.
+ */
+#define BATCH_SZ (64*1024)
 #define BATCH_RESERVED 16
 
 #define MAX_RELOCS 400
diff --git a/src/mesa/drivers/dri/i915tex/intel_idx_render.c b/src/mesa/drivers/dri/i915tex/intel_idx_render.c
index e7e4cf9..decd5d9 100644
--- a/src/mesa/drivers/dri/i915tex/intel_idx_render.c
+++ b/src/mesa/drivers/dri/i915tex/intel_idx_render.c
@@ -57,6 +57,16 @@
 #define MAX_VBO 16		/* XXX: make dynamic */
 #define VBO_SIZE (128*1024)
 
+
+/* Basically limited to what is addressable by the 16-bit indices,
+ * and the number of indices we can fit in a batchbuffer after
+ * making room for state.
+ */
+#define HW_MAX_INDEXABLE_VERTS   ((1<<16)-1)
+#define HW_MAX_INDICES           ((BATCH_SZ - 1024) / 2)
+
+
+
 struct intel_vb {
    struct intel_context *intel;
 
@@ -78,7 +88,10 @@ struct intel_vb {
 
 
 static GLboolean check_idx_render(GLcontext *ctx, 
-				  struct vertex_buffer *VB)
+				  struct vertex_buffer *VB,
+				  GLuint *max_nr_verts,
+				  GLuint *max_nr_indices )
+
 {
    struct intel_context *intel = intel_context(ctx);
    GLuint i;
@@ -87,6 +100,12 @@ static GLboolean check_idx_render(GLcont
        intel->RenderIndex != 0)
       return GL_FALSE;
 
+   /* These are constant, but for some hardware they might vary
+    * depending on the state, eg. according to vertex size.
+    */
+   *max_nr_verts = HW_MAX_INDEXABLE_VERTS;
+   *max_nr_indices = HW_MAX_INDICES;
+
    /* Fix points with different dstorg bias state??  or use different
     * viewport transform in this case only (requires flush at level
     * above).
@@ -141,19 +160,25 @@ release_current_vbo( struct intel_vb *vb
 {
    GLcontext *ctx = &vb->intel->ctx;
 
-   ctx->Driver.UnmapBuffer( ctx, 
-			    GL_ARRAY_BUFFER_ARB,
-			    &vb->current_vbo->Base );
+   DBG("%s\n", __FUNCTION__);
+
+   if (vb->current_vbo_ptr)
+      ctx->Driver.UnmapBuffer( ctx, 
+			       GL_ARRAY_BUFFER_ARB,
+			       &vb->current_vbo->Base );
 
    vb->current_vbo = NULL;
    vb->current_vbo_ptr = NULL;
    vb->current_vbo_size = 0;
    vb->current_vbo_used = 0;
+   vb->dirty = 0;
 }
 
 static void 
 reset_vbo( struct intel_vb *vb )
 {
+   DBG("%s\n", __FUNCTION__);
+
    if (vb->current_vbo)
       release_current_vbo( vb );
 
@@ -168,6 +193,8 @@ get_next_vbo( struct intel_vb *vb, GLuin
 {
    GLcontext *ctx = &vb->intel->ctx;
 
+   DBG("%s\n", __FUNCTION__);
+
    /* XXX: just allocate more vbos here:
     */
    if (vb->nr_vbo == MAX_VBO) {
@@ -201,19 +228,16 @@ get_next_vbo( struct intel_vb *vb, GLuin
 			   GL_DYNAMIC_DRAW_ARB,
 			   &vb->current_vbo->Base );
 
-   /* Map the vbo now, will be unmapped in release_current_vbo, above.
-    */
-   vb->current_vbo_ptr = ctx->Driver.MapBuffer( ctx,
-						GL_ARRAY_BUFFER_ARB,
-						GL_WRITE_ONLY,
-						&vb->current_vbo->Base );
 }
       
 static void *get_space( struct intel_vb *vb, GLuint nr, GLuint vertex_size )
 {
+   GLcontext *ctx = &vb->intel->ctx;
    void *ptr;
    GLuint space = nr * vertex_size * 4;
 
+   DBG("%s %d %d\n", __FUNCTION__, nr, vertex_size);
+
    if (vb->current_vbo_used + space > vb->current_vbo_size)
       get_next_vbo( vb, space );
 
@@ -223,6 +247,16 @@ static void *get_space( struct intel_vb 
       vb->dirty = 1;
    }
 
+   if (!vb->current_vbo_ptr) {
+      /* Map the vbo now, will be unmapped in release_current_vbo, above.
+       */
+      vb->current_vbo_ptr = ctx->Driver.MapBuffer( ctx,
+						   GL_ARRAY_BUFFER_ARB,
+						   GL_WRITE_ONLY,
+						   &vb->current_vbo->Base );
+   }
+
+
    /* Hmm, could just re-emit the vertex buffer packet & avoid this:
     */
    vb->hw_vbo_delta = (vb->current_vbo_used - vb->hw_vbo_offset) / (vb->vertex_size * 4);
@@ -234,13 +268,6 @@ static void *get_space( struct intel_vb 
 }
 
 
-static GLuint get_max_vb_size( GLcontext *ctx )
-{
-   /* Basically limited to what is addressable by the 16-bit indices.
-    */
-   return (1<<16)-1;
-}
-
 static void
 build_and_emit_vertices(GLcontext * ctx, GLuint nr)
 {
@@ -248,6 +275,8 @@ build_and_emit_vertices(GLcontext * ctx,
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    void *ptr = get_space(intel->vb, nr, intel->vertex_size );
 
+   DBG("%s %d\n", __FUNCTION__, nr);
+
    assert(tnl->clipspace.vertex_size == intel->vertex_size * 4);
 
    tnl->clipspace.new_inputs |= VERT_BIT_POS;
@@ -264,6 +293,8 @@ static void emit_built_vertices( GLconte
    struct intel_context *intel = intel_context(ctx);
    void *ptr = get_space(intel->vb, nr, intel->vertex_size );
 
+   DBG("%s %d\n", __FUNCTION__, nr);
+
    memcpy(ptr, _tnl_get_vertex(ctx, 0), 
 	  nr * intel->vertex_size * sizeof(GLuint));
 }
@@ -349,15 +380,17 @@ static void emit_prims( GLcontext *ctx,
  */
 void intel_idx_lost_hardware( struct intel_context *intel )
 {
+   GLcontext *ctx = &intel->ctx;
    struct intel_vb *vb = intel->vb;
 
    DBG("%s\n", __FUNCTION__);
 
-   if (vb->current_vbo) 
-      release_current_vbo( vb );
-
-
-/*    reset_vbo(intel->vb); */
+   if (vb->current_vbo_ptr) {
+      ctx->Driver.UnmapBuffer( ctx, 
+			       GL_ARRAY_BUFFER_ARB,
+			       &vb->current_vbo->Base );
+      vb->current_vbo_ptr = NULL;
+   }
 }
 
 void intel_idx_init( struct intel_context *intel )
@@ -367,7 +400,6 @@ void intel_idx_init( struct intel_contex
    GLuint i;
 
    tnl->Driver.Render.CheckIdxRender       = check_idx_render;
-   tnl->Driver.Render.GetMaxVBSize         = get_max_vb_size;
    tnl->Driver.Render.BuildAndEmitVertices = build_and_emit_vertices;
    tnl->Driver.Render.EmitBuiltVertices    = emit_built_vertices;
    tnl->Driver.Render.EmitPrims            = emit_prims;
diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h
index 837507c..e67de7f 100644
--- a/src/mesa/tnl/t_context.h
+++ b/src/mesa/tnl/t_context.h
@@ -489,8 +489,10 @@ struct tnl_device_driver
        * This function is called only from _tnl_render_stage in tnl/t_render.c.
        */
       
-      GLboolean (*CheckIdxRender)( GLcontext *ctx, struct vertex_buffer *VB );
-      GLuint (*GetMaxVBSize)( GLcontext *ctx );
+      GLboolean (*CheckIdxRender)( GLcontext *ctx, 
+				   struct vertex_buffer *VB,
+				   GLuint *max_indexable_vertices,
+				   GLuint *max_indices );
       void (*BuildAndEmitVertices)( GLcontext *ctx, GLuint nr );
       void (*EmitBuiltVertices)( GLcontext *ctx, GLuint nr );
       void (*EmitPrims)( GLcontext *ctx,
diff --git a/src/mesa/tnl/t_vb_index.c b/src/mesa/tnl/t_vb_index.c
index 8520d54..ebaac99 100644
--- a/src/mesa/tnl/t_vb_index.c
+++ b/src/mesa/tnl/t_vb_index.c
@@ -45,7 +45,8 @@
 #include "t_pipeline.h"
 
 #define IDX_MAX_PRIM 64
-#define IDX_MAX_INDEX 2048	/* xxx: fix me! */
+#define INITIAL_INDEX_BUFSZ 2048
+
 
 struct idx_context {
    GLcontext *ctx;
@@ -58,12 +59,14 @@ struct idx_context {
    struct _mesa_prim prim[IDX_MAX_PRIM];	
 
    GLuint nr_indices;
-   GLuint indices[2048];
+   GLuint *indices;
    GLuint index_buffer_size;
 
-   GLuint vb_size;
    GLuint orig_VB_count;
 
+   GLuint hw_max_indexable_verts;
+   GLuint hw_max_indices;
+
    GLboolean flatshade;
 };
 
@@ -84,8 +87,8 @@ static GLboolean init_idx( GLcontext *ct
 
    stage->privatePtr = (void *)idx;
 
-   idx->index_buffer_size = IDX_MAX_INDEX;
-
+   idx->index_buffer_size = INITIAL_INDEX_BUFSZ;
+   idx->indices = _mesa_malloc(idx->index_buffer_size * sizeof(GLuint));
    idx->VB = VB;
    idx->tnl = tnl;
    idx->ctx = ctx;
@@ -106,7 +109,7 @@ static void free_idx( struct tnl_pipelin
 static void flush( struct idx_context *idx )
 {
    assert(idx->nr_prims <= IDX_MAX_PRIM);
-   assert(idx->nr_indices <= IDX_MAX_INDEX);
+   assert(idx->nr_indices <= idx->index_buffer_size);
 
    if (idx->VB->ClipOrMask) {
       idx->tnl->Driver.Render.EmitBuiltVertices( idx->ctx, idx->VB->Count );
@@ -128,15 +131,58 @@ static void flush( struct idx_context *i
    idx->current_prim = NULL;
 }
 
-static void check_flush( struct idx_context *idx )
+static void try_grow_indices( struct idx_context *idx, GLuint indices )
+{
+   GLuint new_size = idx->index_buffer_size * 2;
+
+   while (new_size < indices) 
+      new_size *= 2;
+
+   if (new_size > idx->hw_max_indices)
+      new_size = idx->hw_max_indices;
+
+   if (new_size > idx->index_buffer_size &&
+       new_size > indices) {
+
+      GLuint old_size = idx->index_buffer_size;
+      idx->index_buffer_size = new_size;
+      idx->indices = _mesa_realloc(idx->indices, 
+				   old_size * sizeof(GLuint),
+				   new_size * sizeof(GLuint));
+   }
+}
+
+
+/* We clip into the pre-allocated vertex buffer (held by t_vertex.c).
+ * This may eventually fill up, so need to check after each clipped
+ * primitive how we are doing.  Similarly, we may run out of space for
+ * indices. For non-clipped prims, this is done once at the start of
+ * drawing.
+ */
+static void check_flush( struct idx_context *idx, 
+			 GLuint elts )
 {
-   if (idx->index_buffer_size - idx->nr_indices < MAX_CLIPPED_VERTICES ||
-       idx->vb_size - idx->VB->Count < MAX_CLIPPED_VERTICES) {
-      _mesa_printf("forced flush\n");
+   GLuint indices = elts + idx->nr_indices + MAX_CLIPPED_VERTICES * 3;
+
+   assert(idx->nr_prims <= IDX_MAX_PRIM);
+   assert(idx->nr_indices <= idx->index_buffer_size);
+
+   if (idx->hw_max_indexable_verts < idx->VB->Count + MAX_CLIPPED_VERTICES) {
       flush( idx );
    }
+
+   if (idx->index_buffer_size < indices) {
+      try_grow_indices( idx, indices );
+
+      if (idx->index_buffer_size < indices) 
+	 flush( idx );
+   }
+
+   assert (idx->index_buffer_size > indices);
+   assert (idx->hw_max_indexable_verts > idx->VB->Count + MAX_CLIPPED_VERTICES);
 }
 
+
 static void elt( struct idx_context *idx, GLuint i )
 {
    idx->indices[idx->nr_indices++] = i;
@@ -156,6 +202,36 @@ static GLenum reduce_mode( GLuint mode )
    }
 }
 
+static GLenum nr_elts( GLuint mode, GLuint count )
+{
+   switch (mode) {
+   case GL_POINTS:
+      return count;
+   case GL_LINES:
+      return count;
+   case GL_LINE_LOOP:
+      return count * 2 + 2;
+   case GL_LINE_STRIP:
+      return count * 2;
+   case GL_TRIANGLES:
+      return count;
+   case GL_TRIANGLE_STRIP:
+      return count * 3;
+   case GL_TRIANGLE_FAN:
+      return count * 3;
+   case GL_QUADS:
+      return (count / 4) * 6;
+   case GL_QUAD_STRIP:
+      return (count / 2) * 6;
+   case GL_POLYGON:
+      return count * 3;
+   default:
+      return 0;
+   }
+}
+
+
+
 static void set_mode( struct idx_context *idx, GLuint flags )
 {
    GLenum mode = reduce_mode(flags & PRIM_MODE_MASK);
@@ -184,6 +260,9 @@ static void points( struct idx_context *
    const GLubyte *mask = idx->VB->ClipMask;
    GLuint i;
 
+   if (idx->VB->ClipOrMask)
+      check_flush(idx, count);
+
    if (elts) {
       for (i = 0; i < count; i++) {
 	 GLuint e = elts[i];
@@ -197,6 +276,9 @@ static void points( struct idx_context *
 	    elt( idx, i );
       }
    }      
+
+   if (idx->VB->ClipOrMask)
+      check_flush(idx, 0);
 }
 
 static void line( struct idx_context *idx, GLuint a, GLuint b )
@@ -216,6 +298,11 @@ static void quad( struct idx_context *id
 		  GLuint a, GLuint b, 
 		  GLuint c, GLuint d )
 {
+   /* If smooth shading, draw like a trifan which gives better
+    * rasterization on some hardware.  Otherwise draw as two triangles
+    * with provoking vertex in third position as required for flat
+    * shading.
+    */
    if (idx->flatshade) {
       elt( idx, a );
       elt( idx, b );
@@ -228,32 +315,26 @@ static void quad( struct idx_context *id
    else {
       elt( idx, a );
       elt( idx, b );
-      elt( idx, d );
+      elt( idx, c );
       
-      elt( idx, b );
+      elt( idx, a );
       elt( idx, c );
       elt( idx, d );
    }
 }
 
-static void clipped_poly( struct idx_context *idx, 
-			  const GLuint *elts, GLuint nr )
+/* 
+ */
+static void polygon( struct idx_context *idx, 
+		     const GLuint *elts, GLuint nr )
 {
    GLuint i;
 
-   for (i = 0; i < nr; i++)
+   for (i = 2; i < nr; i++) {
+      elt( idx, elts[0] );
+      elt( idx, elts[i-1] );
       elt( idx, elts[i] );
-
-   check_flush( idx );
-}
-
-
-static void clipped_line( struct idx_context *idx, GLuint a, GLuint b )
-{
-   elt( idx, a );
-   elt( idx, b );
-
-   check_flush( idx );
+   }
 }
 
 
@@ -265,8 +346,8 @@ static void clipped_line( struct idx_con
 #define CTX_ARG struct idx_context *idx
 #define GET_REAL_CTX GLcontext *ctx = idx->ctx;
 
-#define CLIPPED_POLYGON( list, n ) clipped_poly( idx, list, n )
-#define CLIPPED_LINE( a, b ) clipped_line( idx, a, b )
+#define CLIPPED_POLYGON( list, n ) polygon( idx, list, n )
+#define CLIPPED_LINE( a, b ) line( idx, a, b )
 
 #define W(i) coord[i][3]
 #define Z(i) coord[i][2]
@@ -289,9 +370,6 @@ static void clipped_line( struct idx_con
  */
 #define CLIPMASK (CLIP_FRUSTUM_BITS | CLIP_CULL_BIT)
 
-
-/* Vertices, with the possibility of clipping.
- */
 #define RENDER_POINTS( start, count ) \
    points( idx, start, count )
 
@@ -303,6 +381,7 @@ do {						\
       line( idx, v1, v2 );			\
    else if (!(c1 & c2 & CLIPMASK))		\
       clip_line_4( idx, v1, v2, ormask );	\
+   CHECK_FLUSH( idx ); \
 } while (0)
 
 #define RENDER_TRI( v1, v2, v3 )			\
@@ -313,6 +392,7 @@ do {							\
       tri( idx, v1, v2, v3 );			\
    else if (!(c1 & c2 & c3 & CLIPMASK))			\
       clip_tri_4( idx, v1, v2, v3, ormask );		\
+   CHECK_FLUSH( idx ); \
 } while (0)
 
 #define RENDER_QUAD( v1, v2, v3, v4 )			\
@@ -324,6 +404,7 @@ do {							\
       quad( idx, v1, v2, v3, v4 );			\
    else if (!(c1 & c2 & c3 & c4 & CLIPMASK)) 		\
       clip_quad_4( idx, v1, v2, v3, v4, ormask );	\
+   CHECK_FLUSH( idx ); \
 } while (0)
 
 #define INIT(x) \
@@ -337,68 +418,54 @@ do {							\
    (void) elt; (void) mask;
 
 
+/* Verts, clipping.
+ */
+#define CHECK_FLUSH(idx) check_flush(idx, 6)
 #define MASK(x) mask[x]
 #define TAG(x) x##_verts_clip
 #define PRESERVE_VB_DEFS
 #include "t_vb_rendertmp.h"
 
-/* Elts, with the possibility of clipping.
+/* Elts, clipping.
  */
 #undef ELT
 #undef TAG
 #define ELT(x) elt[x]
 #define TAG(x) x##_elts_clip
+#define PRESERVE_VB_DEFS
 #include "t_vb_rendertmp.h"
 
 
-/**********************************************************************/
-/*                  Render whole begin/end objects                    */
-/**********************************************************************/
-
-#define NEED_EDGEFLAG_SETUP 0
-
-
-/* Vertices, no clipping.
+/* Elts, no clipping.
  */
-#define RENDER_POINTS( start, count ) \
-   points( idx, start, count )
-
-#define RENDER_LINE( v1, v2 ) \
-   line( idx, v1, v2 )
-
-#define RENDER_TRI( v1, v2, v3 ) \
-   tri( idx, v1, v2, v3 )
-
-#define RENDER_QUAD( v1, v2, v3, v4 ) \
-   quad( idx, v1, v2, v3, v4 )
-
-#define INIT(x) \
-   set_mode( idx, x )
-
-#define LOCAL_VARS						\
-   const GLuint * const elt = idx->VB->Elts;			\
-   (void) elt; 
-
-#define TAG(x) x##_verts
+#undef MASK
+#undef TAG
+#undef CHECK_FLUSH
+#define CHECK_FLUSH(idx) ((void)idx)
+#define MASK(x) 0
+#define TAG(x) x##_elts
 #define PRESERVE_VB_DEFS
 #include "t_vb_rendertmp.h"
 
 
-/* Elts, no clipping.
+/* Verts, no clipping
  */
 #undef ELT
-#define TAG(x) x##_elts
-#define ELT(x) elt[x]
+#undef TAG
+#define ELT(x) x
+#define TAG(x) x##_verts
 #include "t_vb_rendertmp.h"
 
 
+
+
 /**********************************************************************/
 /*              Clip and render whole vertex buffers                  */
 /**********************************************************************/
 
 
-static GLboolean run_render( GLcontext *ctx,
-			     struct tnl_pipeline_stage *stage )
+static GLboolean run_index_render( GLcontext *ctx,
+				   struct tnl_pipeline_stage *stage )
 {
    struct idx_context *idx = (struct idx_context *)stage->privatePtr;
    TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -407,12 +474,26 @@ static GLboolean run_render( GLcontext *
    GLuint i;
 
 
-   if (!tnl->Driver.Render.CheckIdxRender( ctx, VB ))
+   if (!tnl->Driver.Render.CheckIdxRender( ctx,
+					   VB,
+					   &idx->hw_max_indexable_verts,
+					   &idx->hw_max_indices))
       return GL_TRUE;
 
    idx->orig_VB_count = VB->Count;
    idx->nr_prims = 0;
    idx->nr_indices = 0;
+   idx->flatshade = (ctx->Light.ShadeModel == GL_FLAT);
+
+   if (idx->index_buffer_size > idx->hw_max_indices) {
+      idx->index_buffer_size = MIN2(INITIAL_INDEX_BUFSZ, 
+				    idx->hw_max_indices);
+
+      _mesa_free(idx->indices);
+
+      idx->indices = _mesa_malloc(idx->index_buffer_size * 
+				  sizeof(GLuint));
+   }
 
 
    /* Allow the drivers to lock before projected verts are built so
@@ -427,12 +508,13 @@ static GLboolean run_render( GLcontext *
    if (VB->ClipOrMask) {
       tab = VB->Elts ? render_tab_elts_clip : render_tab_verts_clip;
 
-      idx->vb_size = tnl->Driver.Render.GetMaxVBSize( ctx );
 
-      /* The driver must guarentee this, it is not our fault if this
-       * fails:
+      /* In this case, what do we do?  Split the primitives?  Just
+       * fail?  Currently make it the driver's responsibility to set
+       * VB->Size small enough that this never happens.  Then any
+       * splitting will be done earlier in t_draw.c.
        */
-      assert (idx->vb_size >= VB->Count * 1.2);
+      assert (idx->hw_max_indexable_verts >= VB->Count * 1.2);
 	 
 
       /* Have to build these before clipping.  Not ideal, but there
@@ -455,6 +537,10 @@ static GLboolean run_render( GLcontext *
    for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
       const struct _mesa_prim *prim = &VB->Primitive[i];
+
+      if (!VB->ClipOrMask)
+	 check_flush( idx, 
+		      nr_elts( prim->mode, prim->count ));
       
       if (prim->count)
 	 tab[prim->mode]( idx, 
@@ -486,7 +572,7 @@ const struct tnl_pipeline_stage _tnl_ind
    init_idx,			/* creator */
    free_idx,			/* destructor */
    NULL,			/* validate */
-   run_render			/* run */
+   run_index_render		/* run */
 };