mesa: Branch 'index-swtnl-0.1'
Keith Whitwell
keithw at kemper.freedesktop.org
Fri Feb 16 19:04:46 UTC 2007
src/mesa/drivers/dri/i915tex/intel_batchbuffer.h | 5
src/mesa/drivers/dri/i915tex/intel_idx_render.c | 78 +++++--
src/mesa/tnl/t_context.h | 6
src/mesa/tnl/t_vb_index.c | 228 +++++++++++++++--------
4 files changed, 220 insertions(+), 97 deletions(-)
New commits:
diff-tree bd56abc3441dfb192b7825a8407411f89827abd1 (from 2ca4ca0068203699f73546de0db544a2a687eae7)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date: Fri Feb 16 18:35:12 2007 +0000
Dynamically grow index storage. Fix several bugs.
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
index 59261f7..916bcae 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
@@ -6,7 +6,10 @@
struct intel_context;
-#define BATCH_SZ 16384
+/* Must be able to hold at minimum VB->Size * 3 * 2 bytes for
+ * intel_idx_render.c indices, which is currently about 20k.
+ */
+#define BATCH_SZ (64*1024)
#define BATCH_RESERVED 16
#define MAX_RELOCS 400
diff --git a/src/mesa/drivers/dri/i915tex/intel_idx_render.c b/src/mesa/drivers/dri/i915tex/intel_idx_render.c
index e7e4cf9..decd5d9 100644
--- a/src/mesa/drivers/dri/i915tex/intel_idx_render.c
+++ b/src/mesa/drivers/dri/i915tex/intel_idx_render.c
@@ -57,6 +57,16 @@
#define MAX_VBO 16 /* XXX: make dynamic */
#define VBO_SIZE (128*1024)
+
+/* Basically limited to what is addressable by the 16-bit indices,
+ * and the number of indices we can fit in a batchbuffer after
+ * making room for state.
+ */
+#define HW_MAX_INDEXABLE_VERTS ((1<<16)-1)
+#define HW_MAX_INDICES ((BATCH_SZ - 1024) / 2)
+
+
+
struct intel_vb {
struct intel_context *intel;
@@ -78,7 +88,10 @@ struct intel_vb {
static GLboolean check_idx_render(GLcontext *ctx,
- struct vertex_buffer *VB)
+ struct vertex_buffer *VB,
+ GLuint *max_nr_verts,
+ GLuint *max_nr_indices )
+
{
struct intel_context *intel = intel_context(ctx);
GLuint i;
@@ -87,6 +100,12 @@ static GLboolean check_idx_render(GLcont
intel->RenderIndex != 0)
return GL_FALSE;
+ /* These are constant, but for some hardware they might vary
+ * depending on the state, eg. according to vertex size.
+ */
+ *max_nr_verts = HW_MAX_INDEXABLE_VERTS;
+ *max_nr_indices = HW_MAX_INDICES;
+
/* Fix points with different dstorg bias state?? or use different
* viewport transform in this case only (requires flush at level
* above).
@@ -141,19 +160,25 @@ release_current_vbo( struct intel_vb *vb
{
GLcontext *ctx = &vb->intel->ctx;
- ctx->Driver.UnmapBuffer( ctx,
- GL_ARRAY_BUFFER_ARB,
- &vb->current_vbo->Base );
+ DBG("%s\n", __FUNCTION__);
+
+ if (vb->current_vbo_ptr)
+ ctx->Driver.UnmapBuffer( ctx,
+ GL_ARRAY_BUFFER_ARB,
+ &vb->current_vbo->Base );
vb->current_vbo = NULL;
vb->current_vbo_ptr = NULL;
vb->current_vbo_size = 0;
vb->current_vbo_used = 0;
+ vb->dirty = 0;
}
static void
reset_vbo( struct intel_vb *vb )
{
+ DBG("%s\n", __FUNCTION__);
+
if (vb->current_vbo)
release_current_vbo( vb );
@@ -168,6 +193,8 @@ get_next_vbo( struct intel_vb *vb, GLuin
{
GLcontext *ctx = &vb->intel->ctx;
+ DBG("%s\n", __FUNCTION__);
+
/* XXX: just allocate more vbos here:
*/
if (vb->nr_vbo == MAX_VBO) {
@@ -201,19 +228,16 @@ get_next_vbo( struct intel_vb *vb, GLuin
GL_DYNAMIC_DRAW_ARB,
&vb->current_vbo->Base );
- /* Map the vbo now, will be unmapped in release_current_vbo, above.
- */
- vb->current_vbo_ptr = ctx->Driver.MapBuffer( ctx,
- GL_ARRAY_BUFFER_ARB,
- GL_WRITE_ONLY,
- &vb->current_vbo->Base );
}
static void *get_space( struct intel_vb *vb, GLuint nr, GLuint vertex_size )
{
+ GLcontext *ctx = &vb->intel->ctx;
void *ptr;
GLuint space = nr * vertex_size * 4;
+ DBG("%s %d %d\n", __FUNCTION__, nr, vertex_size);
+
if (vb->current_vbo_used + space > vb->current_vbo_size)
get_next_vbo( vb, space );
@@ -223,6 +247,16 @@ static void *get_space( struct intel_vb
vb->dirty = 1;
}
+ if (!vb->current_vbo_ptr) {
+ /* Map the vbo now, will be unmapped in release_current_vbo, above.
+ */
+ vb->current_vbo_ptr = ctx->Driver.MapBuffer( ctx,
+ GL_ARRAY_BUFFER_ARB,
+ GL_WRITE_ONLY,
+ &vb->current_vbo->Base );
+ }
+
+
/* Hmm, could just re-emit the vertex buffer packet & avoid this:
*/
vb->hw_vbo_delta = (vb->current_vbo_used - vb->hw_vbo_offset) / (vb->vertex_size * 4);
@@ -234,13 +268,6 @@ static void *get_space( struct intel_vb
}
-static GLuint get_max_vb_size( GLcontext *ctx )
-{
- /* Basically limited to what is addressable by the 16-bit indices.
- */
- return (1<<16)-1;
-}
-
static void
build_and_emit_vertices(GLcontext * ctx, GLuint nr)
{
@@ -248,6 +275,8 @@ build_and_emit_vertices(GLcontext * ctx,
TNLcontext *tnl = TNL_CONTEXT(ctx);
void *ptr = get_space(intel->vb, nr, intel->vertex_size );
+ DBG("%s %d\n", __FUNCTION__, nr);
+
assert(tnl->clipspace.vertex_size == intel->vertex_size * 4);
tnl->clipspace.new_inputs |= VERT_BIT_POS;
@@ -264,6 +293,8 @@ static void emit_built_vertices( GLconte
struct intel_context *intel = intel_context(ctx);
void *ptr = get_space(intel->vb, nr, intel->vertex_size );
+ DBG("%s %d\n", __FUNCTION__, nr);
+
memcpy(ptr, _tnl_get_vertex(ctx, 0),
nr * intel->vertex_size * sizeof(GLuint));
}
@@ -349,15 +380,17 @@ static void emit_prims( GLcontext *ctx,
*/
void intel_idx_lost_hardware( struct intel_context *intel )
{
+ GLcontext *ctx = &intel->ctx;
struct intel_vb *vb = intel->vb;
DBG("%s\n", __FUNCTION__);
- if (vb->current_vbo)
- release_current_vbo( vb );
-
-
-/* reset_vbo(intel->vb); */
+ if (vb->current_vbo_ptr) {
+ ctx->Driver.UnmapBuffer( ctx,
+ GL_ARRAY_BUFFER_ARB,
+ &vb->current_vbo->Base );
+ vb->current_vbo_ptr = NULL;
+ }
}
void intel_idx_init( struct intel_context *intel )
@@ -367,7 +400,6 @@ void intel_idx_init( struct intel_contex
GLuint i;
tnl->Driver.Render.CheckIdxRender = check_idx_render;
- tnl->Driver.Render.GetMaxVBSize = get_max_vb_size;
tnl->Driver.Render.BuildAndEmitVertices = build_and_emit_vertices;
tnl->Driver.Render.EmitBuiltVertices = emit_built_vertices;
tnl->Driver.Render.EmitPrims = emit_prims;
diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h
index 837507c..e67de7f 100644
--- a/src/mesa/tnl/t_context.h
+++ b/src/mesa/tnl/t_context.h
@@ -489,8 +489,10 @@ struct tnl_device_driver
* This function is called only from _tnl_render_stage in tnl/t_render.c.
*/
- GLboolean (*CheckIdxRender)( GLcontext *ctx, struct vertex_buffer *VB );
- GLuint (*GetMaxVBSize)( GLcontext *ctx );
+ GLboolean (*CheckIdxRender)( GLcontext *ctx,
+ struct vertex_buffer *VB,
+ GLuint *max_indexable_vertices,
+ GLuint *max_indices );
void (*BuildAndEmitVertices)( GLcontext *ctx, GLuint nr );
void (*EmitBuiltVertices)( GLcontext *ctx, GLuint nr );
void (*EmitPrims)( GLcontext *ctx,
diff --git a/src/mesa/tnl/t_vb_index.c b/src/mesa/tnl/t_vb_index.c
index 8520d54..ebaac99 100644
--- a/src/mesa/tnl/t_vb_index.c
+++ b/src/mesa/tnl/t_vb_index.c
@@ -45,7 +45,8 @@
#include "t_pipeline.h"
#define IDX_MAX_PRIM 64
-#define IDX_MAX_INDEX 2048 /* xxx: fix me! */
+#define INITIAL_INDEX_BUFSZ 2048
+
struct idx_context {
GLcontext *ctx;
@@ -58,12 +59,14 @@ struct idx_context {
struct _mesa_prim prim[IDX_MAX_PRIM];
GLuint nr_indices;
- GLuint indices[2048];
+ GLuint *indices;
GLuint index_buffer_size;
- GLuint vb_size;
GLuint orig_VB_count;
+ GLuint hw_max_indexable_verts;
+ GLuint hw_max_indices;
+
GLboolean flatshade;
};
@@ -84,8 +87,8 @@ static GLboolean init_idx( GLcontext *ct
stage->privatePtr = (void *)idx;
- idx->index_buffer_size = IDX_MAX_INDEX;
-
+ idx->index_buffer_size = INITIAL_INDEX_BUFSZ;
+ idx->indices = _mesa_malloc(idx->index_buffer_size * sizeof(GLuint));
idx->VB = VB;
idx->tnl = tnl;
idx->ctx = ctx;
@@ -106,7 +109,7 @@ static void free_idx( struct tnl_pipelin
static void flush( struct idx_context *idx )
{
assert(idx->nr_prims <= IDX_MAX_PRIM);
- assert(idx->nr_indices <= IDX_MAX_INDEX);
+ assert(idx->nr_indices <= idx->index_buffer_size);
if (idx->VB->ClipOrMask) {
idx->tnl->Driver.Render.EmitBuiltVertices( idx->ctx, idx->VB->Count );
@@ -128,15 +131,58 @@ static void flush( struct idx_context *i
idx->current_prim = NULL;
}
-static void check_flush( struct idx_context *idx )
+static void try_grow_indices( struct idx_context *idx, GLuint indices )
+{
+ GLuint new_size = idx->index_buffer_size * 2;
+
+ while (new_size < indices)
+ new_size *= 2;
+
+ if (new_size > idx->hw_max_indices)
+ new_size = idx->hw_max_indices;
+
+ if (new_size > idx->index_buffer_size &&
+ new_size > indices) {
+
+ GLuint old_size = idx->index_buffer_size;
+ idx->index_buffer_size = new_size;
+ idx->indices = _mesa_realloc(idx->indices,
+ old_size * sizeof(GLuint),
+ new_size * sizeof(GLuint));
+ }
+}
+
+
+/* We clip into the pre-allocated vertex buffer (held by t_vertex.c).
+ * This may eventually fill up, so need to check after each clipped
+ * primitive how we are doing. Similarly, we may run out of space for
+ * indices. For non-clipped prims, this is done once at the start of
+ * drawing.
+ */
+static void check_flush( struct idx_context *idx,
+ GLuint elts )
{
- if (idx->index_buffer_size - idx->nr_indices < MAX_CLIPPED_VERTICES ||
- idx->vb_size - idx->VB->Count < MAX_CLIPPED_VERTICES) {
- _mesa_printf("forced flush\n");
+ GLuint indices = elts + idx->nr_indices + MAX_CLIPPED_VERTICES * 3;
+
+ assert(idx->nr_prims <= IDX_MAX_PRIM);
+ assert(idx->nr_indices <= idx->index_buffer_size);
+
+ if (idx->hw_max_indexable_verts < idx->VB->Count + MAX_CLIPPED_VERTICES) {
flush( idx );
}
+
+ if (idx->index_buffer_size < indices) {
+ try_grow_indices( idx, indices );
+
+ if (idx->index_buffer_size < indices)
+ flush( idx );
+ }
+
+ assert (idx->index_buffer_size > indices);
+ assert (idx->hw_max_indexable_verts > idx->VB->Count + MAX_CLIPPED_VERTICES);
}
+
static void elt( struct idx_context *idx, GLuint i )
{
idx->indices[idx->nr_indices++] = i;
@@ -156,6 +202,36 @@ static GLenum reduce_mode( GLuint mode )
}
}
+static GLenum nr_elts( GLuint mode, GLuint count )
+{
+ switch (mode) {
+ case GL_POINTS:
+ return count;
+ case GL_LINES:
+ return count;
+ case GL_LINE_LOOP:
+ return count * 2 + 2;
+ case GL_LINE_STRIP:
+ return count * 2;
+ case GL_TRIANGLES:
+ return count;
+ case GL_TRIANGLE_STRIP:
+ return count * 3;
+ case GL_TRIANGLE_FAN:
+ return count * 3;
+ case GL_QUADS:
+ return (count / 4) * 6;
+ case GL_QUAD_STRIP:
+ return (count / 2) * 6;
+ case GL_POLYGON:
+ return count * 3;
+ default:
+ return 0;
+ }
+}
+
+
+
static void set_mode( struct idx_context *idx, GLuint flags )
{
GLenum mode = reduce_mode(flags & PRIM_MODE_MASK);
@@ -184,6 +260,9 @@ static void points( struct idx_context *
const GLubyte *mask = idx->VB->ClipMask;
GLuint i;
+ if (idx->VB->ClipOrMask)
+ check_flush(idx, count);
+
if (elts) {
for (i = 0; i < count; i++) {
GLuint e = elts[i];
@@ -197,6 +276,9 @@ static void points( struct idx_context *
elt( idx, i );
}
}
+
+ if (idx->VB->ClipOrMask)
+ check_flush(idx, 0);
}
static void line( struct idx_context *idx, GLuint a, GLuint b )
@@ -216,6 +298,11 @@ static void quad( struct idx_context *id
GLuint a, GLuint b,
GLuint c, GLuint d )
{
+ /* If smooth shading, draw like a trifan which gives better
+ * rasterization on some hardware. Otherwise draw as two triangles
+ * with provoking vertex in third position as required for flat
+ * shading.
+ */
if (idx->flatshade) {
elt( idx, a );
elt( idx, b );
@@ -228,32 +315,26 @@ static void quad( struct idx_context *id
else {
elt( idx, a );
elt( idx, b );
- elt( idx, d );
+ elt( idx, c );
- elt( idx, b );
+ elt( idx, a );
elt( idx, c );
elt( idx, d );
}
}
-static void clipped_poly( struct idx_context *idx,
- const GLuint *elts, GLuint nr )
+/*
+ */
+static void polygon( struct idx_context *idx,
+ const GLuint *elts, GLuint nr )
{
GLuint i;
- for (i = 0; i < nr; i++)
+ for (i = 2; i < nr; i++) {
+ elt( idx, elts[0] );
+ elt( idx, elts[i-1] );
elt( idx, elts[i] );
-
- check_flush( idx );
-}
-
-
-static void clipped_line( struct idx_context *idx, GLuint a, GLuint b )
-{
- elt( idx, a );
- elt( idx, b );
-
- check_flush( idx );
+ }
}
@@ -265,8 +346,8 @@ static void clipped_line( struct idx_con
#define CTX_ARG struct idx_context *idx
#define GET_REAL_CTX GLcontext *ctx = idx->ctx;
-#define CLIPPED_POLYGON( list, n ) clipped_poly( idx, list, n )
-#define CLIPPED_LINE( a, b ) clipped_line( idx, a, b )
+#define CLIPPED_POLYGON( list, n ) polygon( idx, list, n )
+#define CLIPPED_LINE( a, b ) line( idx, a, b )
#define W(i) coord[i][3]
#define Z(i) coord[i][2]
@@ -289,9 +370,6 @@ static void clipped_line( struct idx_con
*/
#define CLIPMASK (CLIP_FRUSTUM_BITS | CLIP_CULL_BIT)
-
-/* Vertices, with the possibility of clipping.
- */
#define RENDER_POINTS( start, count ) \
points( idx, start, count )
@@ -303,6 +381,7 @@ do { \
line( idx, v1, v2 ); \
else if (!(c1 & c2 & CLIPMASK)) \
clip_line_4( idx, v1, v2, ormask ); \
+ CHECK_FLUSH( idx ); \
} while (0)
#define RENDER_TRI( v1, v2, v3 ) \
@@ -313,6 +392,7 @@ do { \
tri( idx, v1, v2, v3 ); \
else if (!(c1 & c2 & c3 & CLIPMASK)) \
clip_tri_4( idx, v1, v2, v3, ormask ); \
+ CHECK_FLUSH( idx ); \
} while (0)
#define RENDER_QUAD( v1, v2, v3, v4 ) \
@@ -324,6 +404,7 @@ do { \
quad( idx, v1, v2, v3, v4 ); \
else if (!(c1 & c2 & c3 & c4 & CLIPMASK)) \
clip_quad_4( idx, v1, v2, v3, v4, ormask ); \
+ CHECK_FLUSH( idx ); \
} while (0)
#define INIT(x) \
@@ -337,68 +418,54 @@ do { \
(void) elt; (void) mask;
+/* Verts, clipping.
+ */
+#define CHECK_FLUSH(idx) check_flush(idx, 6)
#define MASK(x) mask[x]
#define TAG(x) x##_verts_clip
#define PRESERVE_VB_DEFS
#include "t_vb_rendertmp.h"
-/* Elts, with the possibility of clipping.
+/* Elts, clipping.
*/
#undef ELT
#undef TAG
#define ELT(x) elt[x]
#define TAG(x) x##_elts_clip
+#define PRESERVE_VB_DEFS
#include "t_vb_rendertmp.h"
-/**********************************************************************/
-/* Render whole begin/end objects */
-/**********************************************************************/
-
-#define NEED_EDGEFLAG_SETUP 0
-
-
-/* Vertices, no clipping.
+/* Elts, no clipping.
*/
-#define RENDER_POINTS( start, count ) \
- points( idx, start, count )
-
-#define RENDER_LINE( v1, v2 ) \
- line( idx, v1, v2 )
-
-#define RENDER_TRI( v1, v2, v3 ) \
- tri( idx, v1, v2, v3 )
-
-#define RENDER_QUAD( v1, v2, v3, v4 ) \
- quad( idx, v1, v2, v3, v4 )
-
-#define INIT(x) \
- set_mode( idx, x )
-
-#define LOCAL_VARS \
- const GLuint * const elt = idx->VB->Elts; \
- (void) elt;
-
-#define TAG(x) x##_verts
+#undef MASK
+#undef TAG
+#undef CHECK_FLUSH
+#define CHECK_FLUSH(idx) ((void)idx)
+#define MASK(x) 0
+#define TAG(x) x##_elts
#define PRESERVE_VB_DEFS
#include "t_vb_rendertmp.h"
-/* Elts, no clipping.
+/* Verts, no clipping
*/
#undef ELT
-#define TAG(x) x##_elts
-#define ELT(x) elt[x]
+#undef TAG
+#define ELT(x) x
+#define TAG(x) x##_verts
#include "t_vb_rendertmp.h"
+
+
/**********************************************************************/
/* Clip and render whole vertex buffers */
/**********************************************************************/
-static GLboolean run_render( GLcontext *ctx,
- struct tnl_pipeline_stage *stage )
+static GLboolean run_index_render( GLcontext *ctx,
+ struct tnl_pipeline_stage *stage )
{
struct idx_context *idx = (struct idx_context *)stage->privatePtr;
TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -407,12 +474,26 @@ static GLboolean run_render( GLcontext *
GLuint i;
- if (!tnl->Driver.Render.CheckIdxRender( ctx, VB ))
+ if (!tnl->Driver.Render.CheckIdxRender( ctx,
+ VB,
+ &idx->hw_max_indexable_verts,
+ &idx->hw_max_indices))
return GL_TRUE;
idx->orig_VB_count = VB->Count;
idx->nr_prims = 0;
idx->nr_indices = 0;
+ idx->flatshade = (ctx->Light.ShadeModel == GL_FLAT);
+
+ if (idx->index_buffer_size > idx->hw_max_indices) {
+ idx->index_buffer_size = MIN2(INITIAL_INDEX_BUFSZ,
+ idx->hw_max_indices);
+
+ _mesa_free(idx->indices);
+
+ idx->indices = _mesa_malloc(idx->index_buffer_size *
+ sizeof(GLuint));
+ }
/* Allow the drivers to lock before projected verts are built so
@@ -427,12 +508,13 @@ static GLboolean run_render( GLcontext *
if (VB->ClipOrMask) {
tab = VB->Elts ? render_tab_elts_clip : render_tab_verts_clip;
- idx->vb_size = tnl->Driver.Render.GetMaxVBSize( ctx );
- /* The driver must guarentee this, it is not our fault if this
- * fails:
+ /* In this case, what do we do? Split the primitives? Just
+ * fail? Currently make it the driver's responsibility to set
+ * VB->Size small enough that this never happens. Then any
+ * splitting will be done earlier in t_draw.c.
*/
- assert (idx->vb_size >= VB->Count * 1.2);
+ assert (idx->hw_max_indexable_verts >= VB->Count * 1.2);
/* Have to build these before clipping. Not ideal, but there
@@ -455,6 +537,10 @@ static GLboolean run_render( GLcontext *
for (i = 0 ; i < VB->PrimitiveCount ; i++)
{
const struct _mesa_prim *prim = &VB->Primitive[i];
+
+ if (!VB->ClipOrMask)
+ check_flush( idx,
+ nr_elts( prim->mode, prim->count ));
if (prim->count)
tab[prim->mode]( idx,
@@ -486,7 +572,7 @@ const struct tnl_pipeline_stage _tnl_ind
init_idx, /* creator */
free_idx, /* destructor */
NULL, /* validate */
- run_render /* run */
+ run_index_render /* run */
};
More information about the mesa-commit
mailing list